Skip to content

Commit 98d57b5

Browse files
authored
[Feat] Allow using x-litellm-stream-timeout header for stream timeout in requests (#14147)
* fix: allow passing stream_timeout header * fix: _get_stream_timeout_from_request * test_add_litellm_data_to_request_with_stream_timeout_header * docs: LiteLLM Headers * test_add_litellm_data_to_request_with_stream_timeout_header
1 parent 58ac3ec commit 98d57b5

File tree

4 files changed

+107
-0
lines changed

4 files changed

+107
-0
lines changed

docs/my-website/docs/proxy/request_headers.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ Special headers that are supported by LiteLLM.
66

77
`x-litellm-timeout` Optional[float]: The timeout for the request in seconds.
88

9+
`x-litellm-stream-timeout` Optional[float]: The timeout for getting the first chunk of the response in seconds (only applies for streaming requests). [Demo Video](https://www.loom.com/share/8da67e4845ce431a98c901d4e45db0e5)
10+
911
`x-litellm-enable-message-redaction`: Optional[bool]: Don't log the message content to logging integrations. Just track spend. [Learn More](./logging#redact-messages-response-content)
1012

1113
`x-litellm-tags`: Optional[str]: A comma separated list (e.g. `tag1,tag2,tag3`) of tags to use for [tag-based routing](./tag_routing) **OR** [spend-tracking](./enterprise.md#tracking-spend-for-custom-tags).

litellm/proxy/_types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2904,6 +2904,7 @@ class LitellmDataForBackendLLMCall(TypedDict, total=False):
29042904
headers: dict
29052905
organization: str
29062906
timeout: Optional[float]
2907+
stream_timeout: Optional[float]
29072908
user: Optional[str]
29082909
num_retries: Optional[int]
29092910

litellm/proxy/litellm_pre_call_utils.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,16 @@ def _get_timeout_from_request(headers: dict) -> Optional[float]:
271271
if timeout_header is not None:
272272
return float(timeout_header)
273273
return None
274+
275+
@staticmethod
276+
def _get_stream_timeout_from_request(headers: dict) -> Optional[float]:
277+
"""
278+
Get the `stream_timeout` from the request headers.
279+
"""
280+
stream_timeout_header = headers.get("x-litellm-stream-timeout", None)
281+
if stream_timeout_header is not None:
282+
return float(stream_timeout_header)
283+
return None
274284

275285
@staticmethod
276286
def _get_num_retries_from_request(headers: dict) -> Optional[int]:
@@ -439,6 +449,10 @@ def add_litellm_data_for_backend_llm_call(
439449
timeout = LiteLLMProxyRequestSetup._get_timeout_from_request(headers)
440450
if timeout is not None:
441451
data["timeout"] = timeout
452+
453+
stream_timeout = LiteLLMProxyRequestSetup._get_stream_timeout_from_request(headers)
454+
if stream_timeout is not None:
455+
data["stream_timeout"] = stream_timeout
442456

443457
num_retries = LiteLLMProxyRequestSetup._get_num_retries_from_request(headers)
444458
if num_retries is not None:

tests/test_litellm/proxy/test_common_request_processing.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,96 @@ async def mock_common_processing_pre_call_logic(
7474
pytest.fail("litellm_call_id is not a valid UUID")
7575
assert data_passed["litellm_call_id"] == returned_data["litellm_call_id"]
7676

77+
@pytest.mark.asyncio
78+
async def test_stream_timeout_header_processing(self):
79+
"""
80+
Test that x-litellm-stream-timeout header gets processed and added to request data as stream_timeout.
81+
"""
82+
from litellm.proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
83+
84+
# Test with stream timeout header
85+
headers_with_timeout = {"x-litellm-stream-timeout": "30.5"}
86+
result = LiteLLMProxyRequestSetup._get_stream_timeout_from_request(headers_with_timeout)
87+
assert result == 30.5
88+
89+
# Test without stream timeout header
90+
headers_without_timeout = {}
91+
result = LiteLLMProxyRequestSetup._get_stream_timeout_from_request(headers_without_timeout)
92+
assert result is None
93+
94+
# Test with invalid header value (should raise ValueError when converting to float)
95+
headers_with_invalid = {"x-litellm-stream-timeout": "invalid"}
96+
with pytest.raises(ValueError):
97+
LiteLLMProxyRequestSetup._get_stream_timeout_from_request(headers_with_invalid)
98+
99+
@pytest.mark.asyncio
100+
async def test_add_litellm_data_to_request_with_stream_timeout_header(self):
101+
"""
102+
Test that x-litellm-stream-timeout header gets processed and added to request data
103+
when calling add_litellm_data_to_request.
104+
"""
105+
from litellm.integrations.opentelemetry import UserAPIKeyAuth
106+
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
107+
108+
# Create test data with a basic completion request
109+
test_data = {
110+
"model": "gpt-3.5-turbo",
111+
"messages": [{"role": "user", "content": "Hello"}]
112+
}
113+
114+
# Mock request with stream timeout header
115+
mock_request = MagicMock(spec=Request)
116+
mock_request.headers = {"x-litellm-stream-timeout": "45.0"}
117+
mock_request.url.path = "/v1/chat/completions"
118+
mock_request.method = "POST"
119+
mock_request.query_params = {}
120+
mock_request.client = None
121+
122+
# Create a minimal mock with just the required attributes
123+
mock_user_api_key_dict = MagicMock()
124+
mock_user_api_key_dict.api_key = "test_api_key_hash"
125+
mock_user_api_key_dict.tpm_limit = None
126+
mock_user_api_key_dict.rpm_limit = None
127+
mock_user_api_key_dict.max_budget = None
128+
mock_user_api_key_dict.spend = 0
129+
mock_user_api_key_dict.allowed_model_region = None
130+
mock_user_api_key_dict.key_alias = None
131+
mock_user_api_key_dict.user_id = None
132+
mock_user_api_key_dict.team_id = None
133+
mock_user_api_key_dict.metadata = {} # Prevent enterprise feature check
134+
mock_user_api_key_dict.team_metadata = None
135+
mock_user_api_key_dict.org_id = None
136+
mock_user_api_key_dict.team_alias = None
137+
mock_user_api_key_dict.end_user_id = None
138+
mock_user_api_key_dict.user_email = None
139+
mock_user_api_key_dict.request_route = None
140+
mock_user_api_key_dict.team_max_budget = None
141+
mock_user_api_key_dict.team_spend = None
142+
mock_user_api_key_dict.model_max_budget = None
143+
mock_user_api_key_dict.parent_otel_span = None
144+
mock_user_api_key_dict.team_model_aliases = None
145+
146+
general_settings = {}
147+
mock_proxy_config = MagicMock()
148+
149+
# Call the actual function that processes headers and adds data
150+
result_data = await add_litellm_data_to_request(
151+
data=test_data,
152+
request=mock_request,
153+
general_settings=general_settings,
154+
user_api_key_dict=mock_user_api_key_dict,
155+
version=None,
156+
proxy_config=mock_proxy_config,
157+
)
158+
159+
# Verify that stream_timeout was extracted from header and added to request data
160+
assert "stream_timeout" in result_data
161+
assert result_data["stream_timeout"] == 45.0
162+
163+
# Verify that the original test data is preserved
164+
assert result_data["model"] == "gpt-3.5-turbo"
165+
assert result_data["messages"] == [{"role": "user", "content": "Hello"}]
166+
77167

78168
@pytest.mark.asyncio
79169
class TestCommonRequestProcessingHelpers:

0 commit comments

Comments
 (0)