From 4aba3fadd60064c02855543d5751c77af110bcec Mon Sep 17 00:00:00 2001 From: Kazuhiro Sera Date: Mon, 15 Sep 2025 20:19:31 +0900 Subject: [PATCH] Document LiteLLM usage tracking --- docs/models/litellm.md | 17 +++++++++++++++++ docs/usage.md | 18 ++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/docs/models/litellm.md b/docs/models/litellm.md index 90572a28c..08263feef 100644 --- a/docs/models/litellm.md +++ b/docs/models/litellm.md @@ -71,3 +71,20 @@ if __name__ == "__main__": asyncio.run(main(model, api_key)) ``` + +## Tracking usage data + +If you want LiteLLM responses to populate the Agents SDK usage metrics, pass `ModelSettings(include_usage=True)` when creating your agent. + +```python +from agents import Agent, ModelSettings +from agents.extensions.models.litellm_model import LitellmModel + +agent = Agent( + name="Assistant", + model=LitellmModel(model="your/model", api_key="..."), + model_settings=ModelSettings(include_usage=True), +) +``` + +With `include_usage=True`, LiteLLM requests report token and request counts through `result.context_wrapper.usage` just like the built-in OpenAI models. diff --git a/docs/usage.md b/docs/usage.md index 4f0a66309..a18f86ae3 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -28,6 +28,24 @@ print("Total tokens:", usage.total_tokens) Usage is aggregated across all model calls during the run (including tool calls and handoffs). +### Enabling usage with LiteLLM models + +LiteLLM providers do not report usage metrics by default. When you are using [`LitellmModel`](models/litellm.md), pass `ModelSettings(include_usage=True)` to your agent so that LiteLLM responses populate `result.context_wrapper.usage`. + +```python +from agents import Agent, ModelSettings, Runner +from agents.extensions.models.litellm_model import LitellmModel + +agent = Agent( + name="Assistant", + model=LitellmModel(model="your/model", api_key="..."), + model_settings=ModelSettings(include_usage=True), +) + +result = await Runner.run(agent, "What's the weather in Tokyo?") +print(result.context_wrapper.usage.total_tokens) +``` + ## Accessing usage with sessions When you use a `Session` (e.g., `SQLiteSession`), each call to `Runner.run(...)` returns usage for that specific run. Sessions maintain conversation history for context, but each run's usage is independent.