Skip to content

Commit 9383b78

Browse files
MshariAlaeenamdrxy
andauthored
feat(groq): add prompt caching token usage details (#33708)
**Description:** Adds support for prompt caching usage metadata in ChatGroq. The integration now captures cached token information from the Groq API response and includes it in the `input_token_details` field of the `usage_metadata`. Changes: - Created new `_create_usage_metadata()` helper function to centralize usage metadata creation logic - Extracts `cached_tokens` from `prompt_tokens_details` in API responses and maps to `input_token_details.cache_read` - Integrated the helper function in both streaming (`_convert_chunk_to_message_chunk`) and non-streaming (`_create_chat_result`) code paths - Added comprehensive unit tests to verify caching metadata handling and backward compatibility This enables users to monitor prompt caching effectiveness when using Groq models with prompt caching enabled. **Issue:** N/A **Dependencies:** None --------- Co-authored-by: Mason Daugherty <[email protected]> Co-authored-by: Mason Daugherty <[email protected]>
1 parent 3c49257 commit 9383b78

File tree

3 files changed

+738
-20
lines changed

3 files changed

+738
-20
lines changed

libs/partners/groq/langchain_groq/chat_models.py

Lines changed: 77 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@
3737
ToolMessage,
3838
ToolMessageChunk,
3939
)
40+
from langchain_core.messages.ai import (
41+
InputTokenDetails,
42+
OutputTokenDetails,
43+
UsageMetadata,
44+
)
4045
from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser
4146
from langchain_core.output_parsers.base import OutputParserLike
4247
from langchain_core.output_parsers.openai_tools import (
@@ -726,15 +731,7 @@ def _create_chat_result(
726731
for res in response["choices"]:
727732
message = _convert_dict_to_message(res["message"])
728733
if token_usage and isinstance(message, AIMessage):
729-
input_tokens = token_usage.get("prompt_tokens", 0)
730-
output_tokens = token_usage.get("completion_tokens", 0)
731-
message.usage_metadata = {
732-
"input_tokens": input_tokens,
733-
"output_tokens": output_tokens,
734-
"total_tokens": token_usage.get(
735-
"total_tokens", input_tokens + output_tokens
736-
),
737-
}
734+
message.usage_metadata = _create_usage_metadata(token_usage)
738735
generation_info = {"finish_reason": res.get("finish_reason")}
739736
if "logprobs" in res:
740737
generation_info["logprobs"] = res["logprobs"]
@@ -774,7 +771,20 @@ def _combine_llm_outputs(self, llm_outputs: list[dict | None]) -> dict:
774771
if token_usage is not None:
775772
for k, v in token_usage.items():
776773
if k in overall_token_usage and v is not None:
777-
overall_token_usage[k] += v
774+
# Handle nested dictionaries
775+
if isinstance(v, dict):
776+
if k not in overall_token_usage:
777+
overall_token_usage[k] = {}
778+
for nested_k, nested_v in v.items():
779+
if (
780+
nested_k in overall_token_usage[k]
781+
and nested_v is not None
782+
):
783+
overall_token_usage[k][nested_k] += nested_v
784+
else:
785+
overall_token_usage[k][nested_k] = nested_v
786+
else:
787+
overall_token_usage[k] += v
778788
else:
779789
overall_token_usage[k] = v
780790
if system_fingerprint is None:
@@ -1329,13 +1339,7 @@ def _convert_chunk_to_message_chunk(
13291339
{k: executed_tool[k] for k in executed_tool if k != "output"}
13301340
)
13311341
if usage := (chunk.get("x_groq") or {}).get("usage"):
1332-
input_tokens = usage.get("prompt_tokens", 0)
1333-
output_tokens = usage.get("completion_tokens", 0)
1334-
usage_metadata = {
1335-
"input_tokens": input_tokens,
1336-
"output_tokens": output_tokens,
1337-
"total_tokens": usage.get("total_tokens", input_tokens + output_tokens),
1338-
}
1342+
usage_metadata = _create_usage_metadata(usage)
13391343
else:
13401344
usage_metadata = None
13411345
return AIMessageChunk(
@@ -1435,3 +1439,59 @@ def _lc_invalid_tool_call_to_groq_tool_call(
14351439
"arguments": invalid_tool_call["args"],
14361440
},
14371441
}
1442+
1443+
1444+
def _create_usage_metadata(groq_token_usage: dict) -> UsageMetadata:
1445+
"""Create usage metadata from Groq token usage response.
1446+
1447+
Args:
1448+
groq_token_usage: Token usage dict from Groq API response.
1449+
1450+
Returns:
1451+
Usage metadata dict with input/output token details.
1452+
"""
1453+
# Support both formats: new Responses API uses "input_tokens",
1454+
# Chat Completions API uses "prompt_tokens"
1455+
input_tokens = (
1456+
groq_token_usage.get("input_tokens")
1457+
or groq_token_usage.get("prompt_tokens")
1458+
or 0
1459+
)
1460+
output_tokens = (
1461+
groq_token_usage.get("output_tokens")
1462+
or groq_token_usage.get("completion_tokens")
1463+
or 0
1464+
)
1465+
total_tokens = groq_token_usage.get("total_tokens") or input_tokens + output_tokens
1466+
1467+
# Support both formats for token details:
1468+
# Responses API uses "*_tokens_details", Chat Completions API might use
1469+
# "prompt_token_details"
1470+
input_details_dict = (
1471+
groq_token_usage.get("input_tokens_details")
1472+
or groq_token_usage.get("prompt_tokens_details")
1473+
or {}
1474+
)
1475+
output_details_dict = (
1476+
groq_token_usage.get("output_tokens_details")
1477+
or groq_token_usage.get("completion_tokens_details")
1478+
or {}
1479+
)
1480+
1481+
input_token_details: dict = {
1482+
"cache_read": input_details_dict.get("cached_tokens"),
1483+
}
1484+
output_token_details: dict = {
1485+
"reasoning": output_details_dict.get("reasoning_tokens"),
1486+
}
1487+
usage_metadata: UsageMetadata = {
1488+
"input_tokens": input_tokens,
1489+
"output_tokens": output_tokens,
1490+
"total_tokens": total_tokens,
1491+
}
1492+
1493+
if filtered_input := {k: v for k, v in input_token_details.items() if v}:
1494+
usage_metadata["input_token_details"] = InputTokenDetails(**filtered_input) # type: ignore[typeddict-item]
1495+
if filtered_output := {k: v for k, v in output_token_details.items() if v}:
1496+
usage_metadata["output_token_details"] = OutputTokenDetails(**filtered_output) # type: ignore[typeddict-item]
1497+
return usage_metadata

0 commit comments

Comments
 (0)