From 4aba3fadd60064c02855543d5751c77af110bcec Mon Sep 17 00:00:00 2001
From: Kazuhiro Sera <seratch@openai.com>
Date: Mon, 15 Sep 2025 20:19:31 +0900
Subject: [PATCH] Document LiteLLM usage tracking

---
 docs/models/litellm.md | 17 +++++++++++++++++
 docs/usage.md          | 18 ++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/docs/models/litellm.md b/docs/models/litellm.md
index 90572a28c..08263feef 100644
--- a/docs/models/litellm.md
+++ b/docs/models/litellm.md
@@ -71,3 +71,20 @@ if __name__ == "__main__":
 
     asyncio.run(main(model, api_key))
 ```
+
+## Tracking usage data
+
+If you want LiteLLM responses to populate the Agents SDK usage metrics, pass `ModelSettings(include_usage=True)` when creating your agent.
+
+```python
+from agents import Agent, ModelSettings
+from agents.extensions.models.litellm_model import LitellmModel
+
+agent = Agent(
+    name="Assistant",
+    model=LitellmModel(model="your/model", api_key="..."),
+    model_settings=ModelSettings(include_usage=True),
+)
+```
+
+With `include_usage=True`, LiteLLM requests report token and request counts through `result.context_wrapper.usage` just like the built-in OpenAI models.
diff --git a/docs/usage.md b/docs/usage.md
index 4f0a66309..a18f86ae3 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -28,6 +28,24 @@ print("Total tokens:", usage.total_tokens)
 
 Usage is aggregated across all model calls during the run (including tool calls and handoffs).
 
+### Enabling usage with LiteLLM models
+
+LiteLLM providers do not report usage metrics by default. When you are using [`LitellmModel`](models/litellm.md), pass `ModelSettings(include_usage=True)` to your agent so that LiteLLM responses populate `result.context_wrapper.usage`.
+
+```python
+from agents import Agent, ModelSettings, Runner
+from agents.extensions.models.litellm_model import LitellmModel
+
+agent = Agent(
+    name="Assistant",
+    model=LitellmModel(model="your/model", api_key="..."),
+    model_settings=ModelSettings(include_usage=True),
+)
+
+result = await Runner.run(agent, "What's the weather in Tokyo?")
+print(result.context_wrapper.usage.total_tokens)
+```
+
 ## Accessing usage with sessions
 
 When you use a `Session` (e.g., `SQLiteSession`), each call to `Runner.run(...)` returns usage for that specific run. Sessions maintain conversation history for context, but each run's usage is independent.