Skip to content

Commit 64a305f

Browse files
Merge pull request #14721 from dharamendrak/reuse-aiohttp-session-http-handler
feat: Add shared_session parameter for aiohttp ClientSession reuse
2 parents d5ced7e + 34f51a2 commit 64a305f

File tree

9 files changed

+744
-28
lines changed

9 files changed

+744
-28
lines changed
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
# Shared Session Support
2+
3+
## Overview
4+
5+
LiteLLM now supports sharing `aiohttp.ClientSession` instances across multiple API calls to avoid creating unnecessary new sessions. This improves performance and resource utilization.
6+
7+
## Usage
8+
9+
### Basic Usage
10+
11+
```python
12+
import asyncio
13+
from aiohttp import ClientSession
14+
from litellm import acompletion
15+
16+
async def main():
17+
# Create a shared session
18+
async with ClientSession() as shared_session:
19+
# Use the same session for multiple calls
20+
response1 = await acompletion(
21+
model="gpt-4o",
22+
messages=[{"role": "user", "content": "Hello"}],
23+
shared_session=shared_session
24+
)
25+
26+
response2 = await acompletion(
27+
model="gpt-4o",
28+
messages=[{"role": "user", "content": "How are you?"}],
29+
shared_session=shared_session
30+
)
31+
32+
# Both calls reuse the same session!
33+
34+
asyncio.run(main())
35+
```
36+
37+
### Without Shared Session (Default)
38+
39+
```python
40+
import asyncio
41+
from litellm import acompletion
42+
43+
async def main():
44+
# Each call creates a new session
45+
response1 = await acompletion(
46+
model="gpt-4o",
47+
messages=[{"role": "user", "content": "Hello"}]
48+
)
49+
50+
response2 = await acompletion(
51+
model="gpt-4o",
52+
messages=[{"role": "user", "content": "How are you?"}]
53+
)
54+
# Two separate sessions created
55+
56+
asyncio.run(main())
57+
```
58+
59+
## Benefits
60+
61+
- **Performance**: Reuse HTTP connections across multiple calls
62+
- **Resource Efficiency**: Reduce memory and connection overhead
63+
- **Better Control**: Manage session lifecycle explicitly
64+
- **Debugging**: Easy to trace which calls use which sessions
65+
66+
## Debug Logging
67+
68+
Enable debug logging to see session reuse in action:
69+
70+
```python
71+
import os
72+
import litellm
73+
74+
# Enable debug logging
75+
os.environ['LITELLM_LOG'] = 'DEBUG'
76+
77+
# You'll see logs like:
78+
# 🔄 SHARED SESSION: acompletion called with shared_session (ID: 12345)
79+
# ✅ SHARED SESSION: Reusing existing ClientSession (ID: 12345)
80+
```
81+
82+
## Common Patterns
83+
84+
### FastAPI Integration
85+
86+
```python
87+
from fastapi import FastAPI
88+
import aiohttp
89+
import litellm
90+
91+
app = FastAPI()
92+
93+
@app.post("/chat")
94+
async def chat(messages: list[dict]):
95+
# Create session per request
96+
async with aiohttp.ClientSession() as session:
97+
return await litellm.acompletion(
98+
model="gpt-4o",
99+
messages=messages,
100+
shared_session=session
101+
)
102+
```
103+
104+
### Batch Processing
105+
106+
```python
107+
import asyncio
108+
from aiohttp import ClientSession
109+
from litellm import acompletion
110+
111+
async def process_batch(messages_list):
112+
async with ClientSession() as shared_session:
113+
tasks = []
114+
for messages in messages_list:
115+
task = acompletion(
116+
model="gpt-4o",
117+
messages=messages,
118+
shared_session=shared_session
119+
)
120+
tasks.append(task)
121+
122+
# All tasks use the same session
123+
results = await asyncio.gather(*tasks)
124+
return results
125+
```
126+
127+
### Custom Session Configuration
128+
129+
```python
130+
import aiohttp
131+
import litellm
132+
133+
# Create optimized session
134+
async with aiohttp.ClientSession(
135+
timeout=aiohttp.ClientTimeout(total=180),
136+
connector=aiohttp.TCPConnector(limit=300, limit_per_host=75)
137+
) as shared_session:
138+
139+
response = await litellm.acompletion(
140+
model="gpt-4o",
141+
messages=[{"role": "user", "content": "Hello"}],
142+
shared_session=shared_session
143+
)
144+
```
145+
146+
## Implementation Details
147+
148+
The `shared_session` parameter is threaded through the entire LiteLLM call chain:
149+
150+
1. **`acompletion()`** - Accepts `shared_session` parameter
151+
2. **`BaseLLMHTTPHandler`** - Passes session to HTTP client creation
152+
3. **`AsyncHTTPHandler`** - Uses existing session if provided
153+
4. **`LiteLLMAiohttpTransport`** - Reuses the session for HTTP requests
154+
155+
## Backward Compatibility
156+
157+
- **100% backward compatible** - Existing code works unchanged
158+
- **Optional parameter** - `shared_session=None` by default
159+
- **No breaking changes** - All existing functionality preserved
160+
161+
## Testing
162+
163+
Test the shared session functionality:
164+
165+
```python
166+
import asyncio
167+
from aiohttp import ClientSession
168+
from litellm import acompletion
169+
170+
async def test_shared_session():
171+
async with ClientSession() as session:
172+
print(f"✅ Created session: {id(session)}")
173+
174+
try:
175+
response = await acompletion(
176+
model="gpt-4o",
177+
messages=[{"role": "user", "content": "Hello"}],
178+
shared_session=session,
179+
api_key="your-api-key"
180+
)
181+
print(f"Response: {response.choices[0].message.content}")
182+
except Exception as e:
183+
print(f"✅ Expected error: {type(e).__name__}")
184+
185+
print("✅ Session control working!")
186+
187+
asyncio.run(test_shared_session())
188+
```
189+
190+
## Files Modified
191+
192+
The shared session functionality was added to these files:
193+
194+
- `litellm/main.py` - Added `shared_session` parameter to `acompletion()` and `completion()`
195+
- `litellm/llms/custom_httpx/http_handler.py` - Core session reuse logic
196+
- `litellm/llms/custom_httpx/llm_http_handler.py` - HTTP handler integration
197+
- `litellm/llms/openai/openai.py` - OpenAI provider integration
198+
- `litellm/llms/openai/common_utils.py` - OpenAI client creation
199+
- `litellm/llms/azure/chat/o_series_handler.py` - Azure O Series handler
200+
201+
## Troubleshooting
202+
203+
### Session Not Being Reused
204+
205+
1. **Check debug logs**: Enable `LITELLM_LOG=DEBUG` to see session reuse messages
206+
2. **Verify session is not closed**: Ensure the session is still active when making calls
207+
3. **Check parameter passing**: Make sure `shared_session` is passed to all `acompletion()` calls
208+
209+
### Performance Issues
210+
211+
1. **Session configuration**: Tune `aiohttp.ClientSession` parameters for your use case
212+
2. **Connection limits**: Adjust `limit` and `limit_per_host` in `TCPConnector`
213+
3. **Timeout settings**: Configure appropriate timeouts for your environment

litellm/llms/azure/chat/o_series_handler.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
Written separately to handle faking streaming for o1 and o3 models.
55
"""
66

7-
from typing import Any, Callable, Optional, Union
7+
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
88

99
import httpx
1010

@@ -13,6 +13,9 @@
1313
from ...openai.openai import OpenAIChatCompletion
1414
from ..common_utils import BaseAzureLLM
1515

16+
if TYPE_CHECKING:
17+
from aiohttp import ClientSession
18+
1619

1720
class AzureOpenAIO1ChatCompletion(BaseAzureLLM, OpenAIChatCompletion):
1821
def completion(
@@ -38,6 +41,7 @@ def completion(
3841
organization: Optional[str] = None,
3942
custom_llm_provider: Optional[str] = None,
4043
drop_params: Optional[bool] = None,
44+
shared_session: Optional["ClientSession"] = None,
4145
):
4246
client = self.get_azure_openai_client(
4347
litellm_params=litellm_params,
@@ -69,4 +73,5 @@ def completion(
6973
organization=organization,
7074
custom_llm_provider=custom_llm_provider,
7175
drop_params=drop_params,
76+
shared_session=shared_session,
7277
)

litellm/llms/custom_httpx/http_handler.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ def __init__(
167167
concurrent_limit=1000,
168168
client_alias: Optional[str] = None, # name for client in logs
169169
ssl_verify: Optional[VerifyTypes] = None,
170+
shared_session: Optional["ClientSession"] = None,
170171
):
171172
self.timeout = timeout
172173
self.event_hooks = event_hooks
@@ -175,6 +176,7 @@ def __init__(
175176
concurrent_limit=concurrent_limit,
176177
event_hooks=event_hooks,
177178
ssl_verify=ssl_verify,
179+
shared_session=shared_session,
178180
)
179181
self.client_alias = client_alias
180182

@@ -184,6 +186,7 @@ def create_client(
184186
concurrent_limit: int,
185187
event_hooks: Optional[Mapping[str, List[Callable[..., Any]]]],
186188
ssl_verify: Optional[VerifyTypes] = None,
189+
shared_session: Optional["ClientSession"] = None,
187190
) -> httpx.AsyncClient:
188191
# Get unified SSL configuration
189192
ssl_config = get_ssl_configuration(ssl_verify)
@@ -199,6 +202,7 @@ def create_client(
199202
transport = AsyncHTTPHandler._create_async_transport(
200203
ssl_context=ssl_config if isinstance(ssl_config, ssl.SSLContext) else None,
201204
ssl_verify=ssl_config if isinstance(ssl_config, bool) else None,
205+
shared_session=shared_session,
202206
)
203207

204208
return httpx.AsyncClient(
@@ -260,7 +264,6 @@ async def post(
260264
files: Optional[RequestFiles] = None,
261265
content: Any = None,
262266
):
263-
264267
start_time = time.time()
265268
try:
266269
if timeout is None:
@@ -523,7 +526,9 @@ def __del__(self) -> None:
523526

524527
@staticmethod
525528
def _create_async_transport(
526-
ssl_context: Optional[ssl.SSLContext] = None, ssl_verify: Optional[bool] = None
529+
ssl_context: Optional[ssl.SSLContext] = None,
530+
ssl_verify: Optional[bool] = None,
531+
shared_session: Optional["ClientSession"] = None,
527532
) -> Optional[Union[LiteLLMAiohttpTransport, AsyncHTTPTransport]]:
528533
"""
529534
- Creates a transport for httpx.AsyncClient
@@ -544,7 +549,9 @@ def _create_async_transport(
544549
#########################################################
545550
if AsyncHTTPHandler._should_use_aiohttp_transport():
546551
return AsyncHTTPHandler._create_aiohttp_transport(
547-
ssl_context=ssl_context, ssl_verify=ssl_verify
552+
ssl_context=ssl_context,
553+
ssl_verify=ssl_verify,
554+
shared_session=shared_session,
548555
)
549556

550557
#########################################################
@@ -612,6 +619,7 @@ def _get_ssl_connector_kwargs(
612619
def _create_aiohttp_transport(
613620
ssl_verify: Optional[bool] = None,
614621
ssl_context: Optional[ssl.SSLContext] = None,
622+
shared_session: Optional["ClientSession"] = None,
615623
) -> LiteLLMAiohttpTransport:
616624
"""
617625
Creates an AiohttpTransport with RequestNotRead error handling
@@ -635,6 +643,18 @@ def _create_aiohttp_transport(
635643
trust_env = True
636644

637645
verbose_logger.debug("Creating AiohttpTransport...")
646+
647+
# Use shared session if provided and valid
648+
if shared_session is not None and not shared_session.closed:
649+
verbose_logger.debug(
650+
f"SHARED SESSION: Reusing existing ClientSession (ID: {id(shared_session)})"
651+
)
652+
return LiteLLMAiohttpTransport(client=shared_session)
653+
654+
# Create new session only if none provided or existing one is invalid
655+
verbose_logger.debug(
656+
"NEW SESSION: Creating new ClientSession (no shared session provided)"
657+
)
638658
return LiteLLMAiohttpTransport(
639659
client=lambda: ClientSession(
640660
connector=TCPConnector(**connector_kwargs),
@@ -921,6 +941,7 @@ def _create_sync_transport(self) -> Optional[HTTPTransport]:
921941
def get_async_httpx_client(
922942
llm_provider: Union[LlmProviders, httpxSpecialProvider],
923943
params: Optional[dict] = None,
944+
shared_session: Optional["ClientSession"] = None,
924945
) -> AsyncHTTPHandler:
925946
"""
926947
Retrieves the async HTTP client from the cache
@@ -942,10 +963,12 @@ def get_async_httpx_client(
942963
return _cached_client
943964

944965
if params is not None:
966+
params["shared_session"] = shared_session
945967
_new_client = AsyncHTTPHandler(**params)
946968
else:
947969
_new_client = AsyncHTTPHandler(
948-
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
970+
timeout=httpx.Timeout(timeout=600.0, connect=5.0),
971+
shared_session=shared_session,
949972
)
950973

951974
litellm.in_memory_llm_clients_cache.set_cache(

0 commit comments

Comments
 (0)