Fix streaming crash on Bedrock while preserving OpenAI usage stats (#64)

Akshay-Dongare · web-flow · commit 28edfda96af3 · 2026-01-30T18:38:17.000-05:00
- Remove global default for `stream_options`, which caused crashes on non-OpenAI providers (e.g., Bedrock, Vertex) by routing requests to the wrong endpoint. - Implement smart logic in `_stream` and `_astream` to automatically add `stream_options={"include_usage": True}` *only* when an OpenAI/Azure model is detected. - Maintain backward compatibility for OpenAI users who rely on streaming usage stats. - Allow users to manually override `stream_options` for any provider. Fixes #51.
diff --git a/langchain_litellm/chat_models/litellm.py b/langchain_litellm/chat_models/litellm.py
@@ -83,7 +83,6 @@
 class ChatLiteLLMException(Exception):
     """Error with the `LiteLLM I/O` library"""
 
-
 def _create_retry_decorator(
     llm: ChatLiteLLM,
     run_manager: Optional[
@@ -337,9 +336,7 @@ class ChatLiteLLM(BaseChatModel):
     client: Any = None  #: :meta private:
     model: str = "gpt-3.5-turbo"
     model_name: Optional[str] = None
-    stream_options: Optional[Dict[str, Any]] = Field(
-        default_factory=lambda: {"include_usage": True}
-    )
+    stream_options: Optional[Dict[str, Any]] = None
     """Model name to use."""
     openai_api_key: Optional[str] = None
     azure_api_key: Optional[str] = None
@@ -566,6 +563,15 @@ def _create_message_dicts(
             params["stop"] = stop
         message_dicts = [_convert_message_to_dict(m) for m in messages]
         return message_dicts, params
+    
+    def _is_openai(self) -> bool:
+        """Check if the current model is OpenAI or Azure."""
+        model = self.model_name or self.model or ""
+        if self.custom_llm_provider == "openai" or self.custom_llm_provider == "azure":
+            return True
+        if "azure" in model or model in _OPENAI_MODELS:
+            return True
+        return False
 
     def _stream(
         self,
@@ -576,7 +582,10 @@ def _stream(
     ) -> Iterator[ChatGenerationChunk]:
         message_dicts, params = self._create_message_dicts(messages, stop)
         params = {**params, **kwargs, "stream": True}
-        params["stream_options"] = self.stream_options
+        if self.stream_options is not None:
+            params["stream_options"] = self.stream_options
+        elif self._is_openai():
+            params["stream_options"] = {"include_usage": True}
         default_chunk_class = AIMessageChunk
         
         for chunk in self.completion_with_retry(
@@ -632,7 +641,10 @@ async def _astream(
     ) -> AsyncIterator[ChatGenerationChunk]:
         message_dicts, params = self._create_message_dicts(messages, stop)
         params = {**params, **kwargs, "stream": True}
-        params["stream_options"] = self.stream_options
+        if self.stream_options is not None:
+            params["stream_options"] = self.stream_options
+        elif self._is_openai():
+            params["stream_options"] = {"include_usage": True}
         default_chunk_class = AIMessageChunk
         
         async for chunk in await self.acompletion_with_retry(