Skip to content

Commit cee652e

Browse files
authored
change(loongsuite-instrumentation-langchain): improve token usage extraction & context propagation (#148)
1 parent 15d683f commit cee652e

File tree

4 files changed

+178
-6
lines changed

4 files changed

+178
-6
lines changed

instrumentation-loongsuite/loongsuite-instrumentation-langchain/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## Unreleased
99

10+
### Changed
11+
12+
- Set `run_inline = True` on the tracer so LangChain callbacks run inline for correct OpenTelemetry context propagation
13+
([#148](https://github.com/alibaba/loongsuite-python-agent/pull/148))
14+
- Improved token usage extraction to support multiple LangChain/LLM provider formats
15+
([#148](https://github.com/alibaba/loongsuite-python-agent/pull/148))
16+
1017
## Version 0.2.0 (2026-03-12)
1118

1219
### Added

instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_tracer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,8 @@ def __init__(
168168
**kwargs: Any,
169169
) -> None:
170170
super().__init__(_schema_format="original+chat", **kwargs)
171+
# We need run callback inline so that propagate the context correctly.
172+
self.run_inline = True
171173
self._handler = handler
172174
self._tracer = get_tracer(
173175
__name__,

instrumentation-loongsuite/loongsuite-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/internal/_utils.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -318,9 +318,15 @@ def _parse_token_usage_dict(token_usage: Any) -> tuple[int | None, int | None]:
318318
"""Parse a token_usage/usage dict into (input_tokens, output_tokens)."""
319319
if not isinstance(token_usage, dict):
320320
return None, None
321-
inp = token_usage.get("prompt_tokens") or token_usage.get("input_tokens")
322-
out = token_usage.get("completion_tokens") or token_usage.get(
323-
"output_tokens"
321+
inp = (
322+
token_usage.get("prompt_tokens")
323+
or token_usage.get("PromptTokens")
324+
or token_usage.get("input_tokens")
325+
)
326+
out = (
327+
token_usage.get("completion_tokens")
328+
or token_usage.get("CompletionTokens")
329+
or token_usage.get("output_tokens")
324330
)
325331
return (
326332
int(inp) if inp is not None else None,
@@ -334,7 +340,8 @@ def _extract_token_usage(run: Any) -> tuple[int | None, int | None]:
334340
Tries multiple LangChain formats in order:
335341
1. outputs["llm_output"]["token_usage"] or ["usage"]
336342
2. generations[i][j]["generation_info"]["token_usage"] or ["usage"]
337-
3. generations[i][j]["message"].response_metadata or ["kwargs"]["response_metadata"]
343+
3. generations[i][j]["message"].response_metadata["token_usage"] or generations[i][j]["message"].response_metadata["usage"] or generations[i][j]["message"]["kwargs"]["response_metadata"]["token_usage"] or generations[i][j]["message"]["kwargs"]["response_metadata"]["usage"]
344+
4. generations[i][j]["message"].usage_metadata or generations[i][j]["message"]["kwargs"]["usage_metadata"]
338345
"""
339346
outputs = getattr(run, "outputs", None) or {}
340347

@@ -348,7 +355,8 @@ def _extract_token_usage(run: Any) -> tuple[int | None, int | None]:
348355
return inp, out
349356

350357
# 2. Fallback: generations[][].generation_info["token_usage"] or ["usage"]
351-
# 3. Fallback: generations[][].message.response_metadata["token_usage"]
358+
# 3. Fallback: generations[][].message.response_metadata["token_usage"] or generations[][].message.response_metadata["usage"] or generations[][].message["kwargs"]["response_metadata"]["token_usage"] or generations[][].message["kwargs"]["response_metadata"]["usage"]
359+
# 4. Fallback: generations[][].message.usage_metadata or generations[][].message["kwargs"]["usage_metadata"]
352360
for gen_list in outputs.get("generations") or []:
353361
if not isinstance(gen_list, list):
354362
continue
@@ -363,10 +371,10 @@ def _extract_token_usage(run: Any) -> tuple[int | None, int | None]:
363371
inp, out = _parse_token_usage_dict(token_usage)
364372
if inp is not None or out is not None:
365373
return inp, out
366-
# Try message.response_metadata (serialized: kwargs.response_metadata)
367374
msg = gen.get("message")
368375
if msg is None:
369376
continue
377+
# Try message.response_metadata (serialized: kwargs.response_metadata)
370378
if isinstance(msg, dict):
371379
metadata = (msg.get("kwargs") or {}).get(
372380
"response_metadata"
@@ -380,6 +388,16 @@ def _extract_token_usage(run: Any) -> tuple[int | None, int | None]:
380388
inp, out = _parse_token_usage_dict(token_usage)
381389
if inp is not None or out is not None:
382390
return inp, out
391+
# Try message.usage_metadata (serialized: kwargs.usage_metadata)
392+
if isinstance(msg, dict):
393+
metadata = (msg.get("kwargs") or {}).get(
394+
"usage_metadata"
395+
) or {}
396+
else:
397+
metadata = getattr(msg, "usage_metadata", None) or {}
398+
inp, out = _parse_token_usage_dict(metadata)
399+
if inp is not None or out is not None:
400+
return inp, out
383401

384402
return None, None
385403

instrumentation-loongsuite/loongsuite-instrumentation-langchain/tests/test_data_extraction.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
_extract_response_model,
2525
_extract_token_usage,
2626
_extract_tool_definitions,
27+
_parse_token_usage_dict,
2728
_safe_json,
2829
)
2930
from opentelemetry.util.genai.types import (
@@ -298,6 +299,56 @@ def test_empty_outputs(self):
298299
assert _extract_llm_output_messages(run) == []
299300

300301

302+
class TestParseTokenUsageDict:
303+
"""Unit tests for _parse_token_usage_dict."""
304+
305+
def test_prompt_completion_tokens(self):
306+
"""Standard OpenAI format: prompt_tokens, completion_tokens."""
307+
inp, out = _parse_token_usage_dict(
308+
{"prompt_tokens": 10, "completion_tokens": 20}
309+
)
310+
assert inp == 10
311+
assert out == 20
312+
313+
def test_input_output_tokens(self):
314+
"""Anthropic/Claude format: input_tokens, output_tokens."""
315+
inp, out = _parse_token_usage_dict(
316+
{"input_tokens": 5, "output_tokens": 15}
317+
)
318+
assert inp == 5
319+
assert out == 15
320+
321+
def test_azure_format_pascal_case(self):
322+
"""Azure format: PromptTokens, CompletionTokens (PascalCase)."""
323+
inp, out = _parse_token_usage_dict(
324+
{"PromptTokens": 100, "CompletionTokens": 50}
325+
)
326+
assert inp == 100
327+
assert out == 50
328+
329+
def test_partial_input_only(self):
330+
"""Only input tokens present, output is None."""
331+
inp, out = _parse_token_usage_dict({"prompt_tokens": 42})
332+
assert inp == 42
333+
assert out is None
334+
335+
def test_partial_output_only(self):
336+
"""Only output tokens present, input is None."""
337+
inp, out = _parse_token_usage_dict({"completion_tokens": 8})
338+
assert inp is None
339+
assert out == 8
340+
341+
def test_non_dict_returns_none(self):
342+
"""Non-dict input returns (None, None)."""
343+
assert _parse_token_usage_dict(None) == (None, None)
344+
assert _parse_token_usage_dict("not a dict") == (None, None)
345+
assert _parse_token_usage_dict([]) == (None, None)
346+
347+
def test_empty_dict(self):
348+
"""Empty dict returns (None, None)."""
349+
assert _parse_token_usage_dict({}) == (None, None)
350+
351+
301352
class TestExtractTokenUsage:
302353
def test_from_llm_output(self):
303354
run = _FakeRun(
@@ -459,6 +510,100 @@ def test_llm_output_takes_precedence(self):
459510
assert inp == 1
460511
assert out == 2
461512

513+
def test_from_message_usage_metadata_dict(self):
514+
"""Token usage may be in message.kwargs.usage_metadata (serialized format)."""
515+
run = _FakeRun(
516+
outputs={
517+
"generations": [
518+
[
519+
{
520+
"text": "Response",
521+
"message": {
522+
"kwargs": {
523+
"content": "Response",
524+
"usage_metadata": {
525+
"input_tokens": 30,
526+
"output_tokens": 12,
527+
},
528+
}
529+
},
530+
}
531+
]
532+
]
533+
}
534+
)
535+
inp, out = _extract_token_usage(run)
536+
assert inp == 30
537+
assert out == 12
538+
539+
def test_from_message_usage_metadata_object(self):
540+
"""Token usage may be in message.usage_metadata (object format)."""
541+
542+
class _FakeMessage:
543+
usage_metadata = {
544+
"prompt_tokens": 80,
545+
"completion_tokens": 20,
546+
}
547+
548+
run = _FakeRun(
549+
outputs={
550+
"generations": [
551+
[
552+
{
553+
"text": "Response",
554+
"message": _FakeMessage(),
555+
}
556+
]
557+
]
558+
}
559+
)
560+
inp, out = _extract_token_usage(run)
561+
assert inp == 80
562+
assert out == 20
563+
564+
def test_llm_output_azure_format(self):
565+
"""llm_output may use Azure-style PromptTokens/CompletionTokens."""
566+
run = _FakeRun(
567+
outputs={
568+
"llm_output": {
569+
"token_usage": {
570+
"PromptTokens": 200,
571+
"CompletionTokens": 75,
572+
}
573+
}
574+
}
575+
)
576+
inp, out = _extract_token_usage(run)
577+
assert inp == 200
578+
assert out == 75
579+
580+
def test_response_metadata_usage_key(self):
581+
"""response_metadata may use 'usage' key instead of 'token_usage'."""
582+
run = _FakeRun(
583+
outputs={
584+
"generations": [
585+
[
586+
{
587+
"text": "Hi",
588+
"message": {
589+
"kwargs": {
590+
"response_metadata": {
591+
"usage": {
592+
"prompt_tokens": 15,
593+
"completion_tokens": 7,
594+
}
595+
},
596+
}
597+
},
598+
}
599+
]
600+
]
601+
}
602+
)
603+
inp, out = _extract_token_usage(run)
604+
assert inp == 15
605+
assert out == 7
606+
462607
def test_no_token_usage(self):
463608
run = _FakeRun(outputs={})
464609
inp, out = _extract_token_usage(run)

0 commit comments

Comments
 (0)