BerriAI
diff --git a/‎litellm/proxy/client/chat.py‎
Lines changed: 92 additions & 1 deletion b/‎litellm/proxy/client/chat.py‎
Lines changed: 92 additions & 1 deletion
diff --git a/‎litellm/proxy/client/cli/commands/auth.py‎
Lines changed: 2 additions & 2 deletions b/‎litellm/proxy/client/cli/commands/auth.py‎
Lines changed: 2 additions & 2 deletions
@@ -1,5 +1,8 @@
+import json
+from typing import Any, Dict, Iterator, List, Optional, Union
+
 import requests
-from typing import List, Dict, Any, Optional, Union
+
 from .exceptions import UnauthorizedError
 
 
@@ -99,3 +102,91 @@ def completions(
             if e.response.status_code == 401:
                 raise UnauthorizedError(e)
             raise
+
+    def completions_stream(
+        self,
+        model: str,
+        messages: List[Dict[str, str]],
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        n: Optional[int] = None,
+        max_tokens: Optional[int] = None,
+        presence_penalty: Optional[float] = None,
+        frequency_penalty: Optional[float] = None,
+        user: Optional[str] = None,
+    ) -> Iterator[Dict[str, Any]]:
+        """
+        Create a streaming chat completion.
+
+        Args:
+            model (str): The model to use for completion
+            messages (List[Dict[str, str]]): The messages to generate a completion for
+            temperature (Optional[float]): Sampling temperature between 0 and 2
+            top_p (Optional[float]): Nucleus sampling parameter between 0 and 1
+            n (Optional[int]): Number of completions to generate
+            max_tokens (Optional[int]): Maximum number of tokens to generate
+            presence_penalty (Optional[float]): Presence penalty between -2.0 and 2.0
+            frequency_penalty (Optional[float]): Frequency penalty between -2.0 and 2.0
+            user (Optional[str]): Unique identifier for the end user
+
+        Yields:
+            Dict[str, Any]: Streaming response chunks from the server
+
+        Raises:
+            UnauthorizedError: If the request fails with a 401 status code
+            requests.exceptions.RequestException: If the request fails with any other error
+        """
+        url = f"{self._base_url}/chat/completions"
+
+        # Build request data with required fields
+        data: Dict[str, Any] = {
+            "model": model, 
+            "messages": messages,
+            "stream": True
+        }
+
+        # Add optional parameters if provided
+        if temperature is not None:
+            data["temperature"] = temperature
+        if top_p is not None:
+            data["top_p"] = top_p
+        if n is not None:
+            data["n"] = n
+        if max_tokens is not None:
+            data["max_tokens"] = max_tokens
+        if presence_penalty is not None:
+            data["presence_penalty"] = presence_penalty
+        if frequency_penalty is not None:
+            data["frequency_penalty"] = frequency_penalty
+        if user is not None:
+            data["user"] = user
+
+        # Make streaming request
+        session = requests.Session()
+        try:
+            response = session.post(
+                url, 
+                headers=self._get_headers(), 
+                json=data, 
+                stream=True
+            )
+            response.raise_for_status()
+            
+            # Parse SSE stream
+            for line in response.iter_lines():
+                if line:
+                    line = line.decode('utf-8')
+                    if line.startswith('data: '):
+                        data_str = line[6:]  # Remove 'data: ' prefix
+                        if data_str.strip() == '[DONE]':
+                            break
+                        try:
+                            chunk = json.loads(data_str)
+                            yield chunk
+                        except json.JSONDecodeError:
+                            continue
+                            
+        except requests.exceptions.HTTPError as e:
+            if e.response.status_code == 401:
+                raise UnauthorizedError(e)
+            raise
@@ -281,12 +281,12 @@ def prompt_team_selection_fallback(teams: List[Dict[str, Any]]) -> Optional[Dict
 
 def update_key_with_team(base_url: str, api_key: str, team_id: str) -> bool:
     """Update the API key to be associated with the selected team"""
-
+    from litellm.proxy._types import SpecialModelNames
     from litellm.proxy.client import Client
 
     client = Client(base_url=base_url, api_key=api_key)
     try:
-        client.keys.update(key=api_key, team_id=team_id)
+        client.keys.update(key=api_key, team_id=team_id, models=[SpecialModelNames.all_team_models.value])
         click.echo(f"✅ Successfully assigned key to team: {team_id}")
         return True
     except requests.exceptions.HTTPError as e: