BerriAI
diff --git a/‎docs/my-website/docs/completion/shared_session.md‎
Lines changed: 213 additions & 0 deletions b/‎docs/my-website/docs/completion/shared_session.md‎
Lines changed: 213 additions & 0 deletions
diff --git a/‎litellm/llms/azure/chat/o_series_handler.py‎
Lines changed: 6 additions & 1 deletion b/‎litellm/llms/azure/chat/o_series_handler.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎litellm/llms/custom_httpx/http_handler.py‎
Lines changed: 27 additions & 4 deletions b/‎litellm/llms/custom_httpx/http_handler.py‎
Lines changed: 27 additions & 4 deletions
@@ -0,0 +1,213 @@
+# Shared Session Support
+
+## Overview
+
+LiteLLM now supports sharing `aiohttp.ClientSession` instances across multiple API calls to avoid creating unnecessary new sessions. This improves performance and resource utilization.
+
+## Usage
+
+### Basic Usage
+
+```python
+import asyncio
+from aiohttp import ClientSession
+from litellm import acompletion
+
+async def main():
+    # Create a shared session
+    async with ClientSession() as shared_session:
+        # Use the same session for multiple calls
+        response1 = await acompletion(
+            model="gpt-4o",
+            messages=[{"role": "user", "content": "Hello"}],
+            shared_session=shared_session
+        )
+        
+        response2 = await acompletion(
+            model="gpt-4o", 
+            messages=[{"role": "user", "content": "How are you?"}],
+            shared_session=shared_session
+        )
+        
+        # Both calls reuse the same session!
+
+asyncio.run(main())
+```
+
+### Without Shared Session (Default)
+
+```python
+import asyncio
+from litellm import acompletion
+
+async def main():
+    # Each call creates a new session
+    response1 = await acompletion(
+        model="gpt-4o",
+        messages=[{"role": "user", "content": "Hello"}]
+    )
+    
+    response2 = await acompletion(
+        model="gpt-4o",
+        messages=[{"role": "user", "content": "How are you?"}]
+    )
+    # Two separate sessions created
+
+asyncio.run(main())
+```
+
+## Benefits
+
+- **Performance**: Reuse HTTP connections across multiple calls
+- **Resource Efficiency**: Reduce memory and connection overhead
+- **Better Control**: Manage session lifecycle explicitly
+- **Debugging**: Easy to trace which calls use which sessions
+
+## Debug Logging
+
+Enable debug logging to see session reuse in action:
+
+```python
+import os
+import litellm
+
+# Enable debug logging
+os.environ['LITELLM_LOG'] = 'DEBUG'
+
+# You'll see logs like:
+# 🔄 SHARED SESSION: acompletion called with shared_session (ID: 12345)
+# ✅ SHARED SESSION: Reusing existing ClientSession (ID: 12345)
+```
+
+## Common Patterns
+
+### FastAPI Integration
+
+```python
+from fastapi import FastAPI
+import aiohttp
+import litellm
+
+app = FastAPI()
+
+@app.post("/chat")
+async def chat(messages: list[dict]):
+    # Create session per request
+    async with aiohttp.ClientSession() as session:
+        return await litellm.acompletion(
+            model="gpt-4o",
+            messages=messages,
+            shared_session=session
+        )
+```
+
+### Batch Processing
+
+```python
+import asyncio
+from aiohttp import ClientSession
+from litellm import acompletion
+
+async def process_batch(messages_list):
+    async with ClientSession() as shared_session:
+        tasks = []
+        for messages in messages_list:
+            task = acompletion(
+                model="gpt-4o",
+                messages=messages,
+                shared_session=shared_session
+            )
+            tasks.append(task)
+        
+        # All tasks use the same session
+        results = await asyncio.gather(*tasks)
+        return results
+```
+
+### Custom Session Configuration
+
+```python
+import aiohttp
+import litellm
+
+# Create optimized session
+async with aiohttp.ClientSession(
+    timeout=aiohttp.ClientTimeout(total=180),
+    connector=aiohttp.TCPConnector(limit=300, limit_per_host=75)
+) as shared_session:
+    
+    response = await litellm.acompletion(
+        model="gpt-4o",
+        messages=[{"role": "user", "content": "Hello"}],
+        shared_session=shared_session
+    )
+```
+
+## Implementation Details
+
+The `shared_session` parameter is threaded through the entire LiteLLM call chain:
+
+1. **`acompletion()`** - Accepts `shared_session` parameter
+2. **`BaseLLMHTTPHandler`** - Passes session to HTTP client creation
+3. **`AsyncHTTPHandler`** - Uses existing session if provided
+4. **`LiteLLMAiohttpTransport`** - Reuses the session for HTTP requests
+
+## Backward Compatibility
+
+- **100% backward compatible** - Existing code works unchanged
+- **Optional parameter** - `shared_session=None` by default
+- **No breaking changes** - All existing functionality preserved
+
+## Testing
+
+Test the shared session functionality:
+
+```python
+import asyncio
+from aiohttp import ClientSession
+from litellm import acompletion
+
+async def test_shared_session():
+    async with ClientSession() as session:
+        print(f"✅ Created session: {id(session)}")
+        
+        try:
+            response = await acompletion(
+                model="gpt-4o",
+                messages=[{"role": "user", "content": "Hello"}],
+                shared_session=session,
+                api_key="your-api-key"
+            )
+            print(f"Response: {response.choices[0].message.content}")
+        except Exception as e:
+            print(f"✅ Expected error: {type(e).__name__}")
+        
+        print("✅ Session control working!")
+
+asyncio.run(test_shared_session())
+```
+
+## Files Modified
+
+The shared session functionality was added to these files:
+
+- `litellm/main.py` - Added `shared_session` parameter to `acompletion()` and `completion()`
+- `litellm/llms/custom_httpx/http_handler.py` - Core session reuse logic
+- `litellm/llms/custom_httpx/llm_http_handler.py` - HTTP handler integration
+- `litellm/llms/openai/openai.py` - OpenAI provider integration
+- `litellm/llms/openai/common_utils.py` - OpenAI client creation
+- `litellm/llms/azure/chat/o_series_handler.py` - Azure O Series handler
+
+## Troubleshooting
+
+### Session Not Being Reused
+
+1. **Check debug logs**: Enable `LITELLM_LOG=DEBUG` to see session reuse messages
+2. **Verify session is not closed**: Ensure the session is still active when making calls
+3. **Check parameter passing**: Make sure `shared_session` is passed to all `acompletion()` calls
+
+### Performance Issues
+
+1. **Session configuration**: Tune `aiohttp.ClientSession` parameters for your use case
+2. **Connection limits**: Adjust `limit` and `limit_per_host` in `TCPConnector`
+3. **Timeout settings**: Configure appropriate timeouts for your environment
@@ -4,7 +4,7 @@
 Written separately to handle faking streaming for o1 and o3 models.
 """
 
-from typing import Any, Callable, Optional, Union
+from typing import TYPE_CHECKING, Any, Callable, Optional, Union
 
 import httpx
 
@@ -13,6 +13,9 @@
 from ...openai.openai import OpenAIChatCompletion
 from ..common_utils import BaseAzureLLM
 
+if TYPE_CHECKING:
+    from aiohttp import ClientSession
+
 
 class AzureOpenAIO1ChatCompletion(BaseAzureLLM, OpenAIChatCompletion):
     def completion(
@@ -38,6 +41,7 @@ def completion(
         organization: Optional[str] = None,
         custom_llm_provider: Optional[str] = None,
         drop_params: Optional[bool] = None,
+        shared_session: Optional["ClientSession"] = None,
     ):
         client = self.get_azure_openai_client(
             litellm_params=litellm_params,
@@ -69,4 +73,5 @@ def completion(
             organization=organization,
             custom_llm_provider=custom_llm_provider,
             drop_params=drop_params,
+            shared_session=shared_session,
         )
@@ -167,6 +167,7 @@ def __init__(
         concurrent_limit=1000,
         client_alias: Optional[str] = None,  # name for client in logs
         ssl_verify: Optional[VerifyTypes] = None,
+        shared_session: Optional["ClientSession"] = None,
     ):
         self.timeout = timeout
         self.event_hooks = event_hooks
@@ -175,6 +176,7 @@ def __init__(
             concurrent_limit=concurrent_limit,
             event_hooks=event_hooks,
             ssl_verify=ssl_verify,
+            shared_session=shared_session,
         )
         self.client_alias = client_alias
 
@@ -184,6 +186,7 @@ def create_client(
         concurrent_limit: int,
         event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]],
         ssl_verify: Optional[VerifyTypes] = None,
+        shared_session: Optional["ClientSession"] = None,
     ) -> httpx.AsyncClient:
         # Get unified SSL configuration
         ssl_config = get_ssl_configuration(ssl_verify)
@@ -199,6 +202,7 @@ def create_client(
         transport = AsyncHTTPHandler._create_async_transport(
             ssl_context=ssl_config if isinstance(ssl_config, ssl.SSLContext) else None,
             ssl_verify=ssl_config if isinstance(ssl_config, bool) else None,
+            shared_session=shared_session,
         )
 
         return httpx.AsyncClient(
@@ -260,7 +264,6 @@ async def post(
         files: Optional[RequestFiles] = None,
         content: Any = None,
     ):
-
         start_time = time.time()
         try:
             if timeout is None:
@@ -523,7 +526,9 @@ def __del__(self) -> None:
 
     @staticmethod
     def _create_async_transport(
-        ssl_context: Optional[ssl.SSLContext] = None, ssl_verify: Optional[bool] = None
+        ssl_context: Optional[ssl.SSLContext] = None,
+        ssl_verify: Optional[bool] = None,
+        shared_session: Optional["ClientSession"] = None,
     ) -> Optional[Union[LiteLLMAiohttpTransport, AsyncHTTPTransport]]:
         """
         - Creates a transport for httpx.AsyncClient
@@ -544,7 +549,9 @@ def _create_async_transport(
         #########################################################
         if AsyncHTTPHandler._should_use_aiohttp_transport():
             return AsyncHTTPHandler._create_aiohttp_transport(
-                ssl_context=ssl_context, ssl_verify=ssl_verify
+                ssl_context=ssl_context,
+                ssl_verify=ssl_verify,
+                shared_session=shared_session,
             )
 
         #########################################################
@@ -612,6 +619,7 @@ def _get_ssl_connector_kwargs(
     def _create_aiohttp_transport(
         ssl_verify: Optional[bool] = None,
         ssl_context: Optional[ssl.SSLContext] = None,
+        shared_session: Optional["ClientSession"] = None,
     ) -> LiteLLMAiohttpTransport:
         """
         Creates an AiohttpTransport with RequestNotRead error handling
@@ -635,6 +643,18 @@ def _create_aiohttp_transport(
             trust_env = True
 
         verbose_logger.debug("Creating AiohttpTransport...")
+
+        # Use shared session if provided and valid
+        if shared_session is not None and not shared_session.closed:
+            verbose_logger.debug(
+                f"SHARED SESSION: Reusing existing ClientSession (ID: {id(shared_session)})"
+            )
+            return LiteLLMAiohttpTransport(client=shared_session)
+
+        # Create new session only if none provided or existing one is invalid
+        verbose_logger.debug(
+            "NEW SESSION: Creating new ClientSession (no shared session provided)"
+        )
         return LiteLLMAiohttpTransport(
             client=lambda: ClientSession(
                 connector=TCPConnector(**connector_kwargs),
@@ -921,6 +941,7 @@ def _create_sync_transport(self) -> Optional[HTTPTransport]:
 def get_async_httpx_client(
     llm_provider: Union[LlmProviders, httpxSpecialProvider],
     params: Optional[dict] = None,
+    shared_session: Optional["ClientSession"] = None,
 ) -> AsyncHTTPHandler:
     """
     Retrieves the async HTTP client from the cache
@@ -942,10 +963,12 @@ def get_async_httpx_client(
         return _cached_client
 
     if params is not None:
+        params["shared_session"] = shared_session
         _new_client = AsyncHTTPHandler(**params)
     else:
         _new_client = AsyncHTTPHandler(
-            timeout=httpx.Timeout(timeout=600.0, connect=5.0)
+            timeout=httpx.Timeout(timeout=600.0, connect=5.0),
+            shared_session=shared_session,
         )
 
     litellm.in_memory_llm_clients_cache.set_cache(