jayeshp19 · pull · Sep 15, 2025 · Sep 15, 2025 · Sep 15, 2025
diff --git a/docs/models/litellm.md b/docs/models/litellm.md
@@ -71,3 +71,20 @@ if __name__ == "__main__":
 
     asyncio.run(main(model, api_key))
 ```
+
+## Tracking usage data
+
+If you want LiteLLM responses to populate the Agents SDK usage metrics, pass `ModelSettings(include_usage=True)` when creating your agent.
+
+```python
+from agents import Agent, ModelSettings
+from agents.extensions.models.litellm_model import LitellmModel
+
+agent = Agent(
+    name="Assistant",
+    model=LitellmModel(model="your/model", api_key="..."),
+    model_settings=ModelSettings(include_usage=True),
+)
+```
+
+With `include_usage=True`, LiteLLM requests report token and request counts through `result.context_wrapper.usage` just like the built-in OpenAI models.
diff --git a/docs/usage.md b/docs/usage.md
@@ -28,6 +28,24 @@ print("Total tokens:", usage.total_tokens)
 
 Usage is aggregated across all model calls during the run (including tool calls and handoffs).
 
+### Enabling usage with LiteLLM models
+
+LiteLLM providers do not report usage metrics by default. When you are using [`LitellmModel`](models/litellm.md), pass `ModelSettings(include_usage=True)` to your agent so that LiteLLM responses populate `result.context_wrapper.usage`.
+
+```python
+from agents import Agent, ModelSettings, Runner
+from agents.extensions.models.litellm_model import LitellmModel
+
+agent = Agent(
+    name="Assistant",
+    model=LitellmModel(model="your/model", api_key="..."),
+    model_settings=ModelSettings(include_usage=True),
+)
+
+result = await Runner.run(agent, "What's the weather in Tokyo?")
+print(result.context_wrapper.usage.total_tokens)
+```
+
 ## Accessing usage with sessions
 
 When you use a `Session` (e.g., `SQLiteSession`), each call to `Runner.run(...)` returns usage for that specific run. Sessions maintain conversation history for context, but each run's usage is independent.

diff --git a/src/agents/run.py b/src/agents/run.py
@@ -272,6 +272,7 @@ async def run(
                 We recommend only using this if you are exclusively using OpenAI models;
                 other model providers don't write to the Conversation object,
                 so you'll end up having partial conversations stored.
+            session: A session for automatic conversation history management.
         Returns:
             A run result containing all the inputs, guardrail results and the output of the last
             agent. Agents may perform handoffs, so we don't know the specific type of the output.
@@ -329,6 +330,7 @@ def run_sync(
             previous_response_id: The ID of the previous response, if using OpenAI models via the
                 Responses API, this allows you to skip passing in input from the previous turn.
             conversation_id: The ID of the stored conversation, if any.
+            session: A session for automatic conversation history management.
         Returns:
             A run result containing all the inputs, guardrail results and the output of the last
             agent. Agents may perform handoffs, so we don't know the specific type of the output.
@@ -383,6 +385,7 @@ def run_streamed(
             previous_response_id: The ID of the previous response, if using OpenAI models via the
                 Responses API, this allows you to skip passing in input from the previous turn.
             conversation_id: The ID of the stored conversation, if any.
+            session: A session for automatic conversation history management.
         Returns:
             A result object that contains data about the run, as well as a method to stream events.
         """