streaming should be part of model capabilities

daltonnyx · daltonnyx · commit ded91de6f55f · 2025-06-25T11:59:58.000+07:00
diff --git a/AgentCrew/modules/custom_llm/deepinfra_service.py b/AgentCrew/modules/custom_llm/deepinfra_service.py
@@ -14,7 +14,6 @@ def __init__(self):
             api_key=api_key,
             base_url="https://api.deepinfra.com/v1/openai",
             provider_name="deepinfra",
-            is_stream=True,
         )
         self.model = "Qwen/Qwen3-235B-A22B"
         self.current_input_tokens = 0
diff --git a/AgentCrew/modules/custom_llm/github_copilot_service.py b/AgentCrew/modules/custom_llm/github_copilot_service.py
@@ -16,7 +16,6 @@ def __init__(self):
             api_key=api_key,
             base_url="https://api.githubcopilot.com",
             provider_name="github_copilot",
-            is_stream=True,
             extra_headers={
                 "Copilot-Integration-Id": "vscode-chat",
                 "Editor-Plugin-Version": "CopilotChat.nvim/*",
diff --git a/AgentCrew/modules/custom_llm/service.py b/AgentCrew/modules/custom_llm/service.py
@@ -16,7 +16,6 @@ def __init__(
         base_url: str,
         api_key: str,
         provider_name: str,
-        is_stream: bool = False,
         extra_headers: Optional[Dict[str, str]] = None,
     ):
         """
@@ -34,7 +33,6 @@ def __init__(
         logger.info(
             f"Initialized Custom LLM Service for provider: {provider_name} at {base_url}"
         )
-        self._is_stream = is_stream
         self.extra_headers = extra_headers
 
     def format_tool_result(
@@ -168,7 +166,9 @@ async def stream_assistant_response(self, messages):
         if self.reasoning_effort is None:
             stream_params["reasoning_effort"] = "none"
 
-        if self._is_stream:
+        if "stream" in ModelRegistry.get_model_capabilities(
+            f"{self._provider_name}/{self.model}"
+        ):
             self._is_thinking = False
             return await self.client.chat.completions.create(
                 **stream_params,
@@ -196,7 +196,9 @@ async def stream_assistant_response(self, messages):
     def process_stream_chunk(
         self, chunk, assistant_response: str, tool_uses: List[Dict]
     ) -> Tuple[str, List[Dict], int, int, Optional[str], Optional[tuple]]:
-        if self._is_stream:
+        if "stream" in ModelRegistry.get_model_capabilities(
+            f"{self._provider_name}/{self.model}"
+        ):
             return self._process_stream_chunk(chunk, assistant_response, tool_uses)
         else:
             return self._process_non_stream_chunk(chunk, assistant_response, tool_uses)
diff --git a/AgentCrew/modules/groq/service.py b/AgentCrew/modules/groq/service.py
@@ -38,7 +38,6 @@ def __init__(self):
         self.current_output_tokens = 0
         self.system_prompt = ""
         self.temperature = 0.4
-        self._is_stream = False
         logger.info("Initialized Groq Service")
 
     def set_think(self, budget_tokens) -> bool:
diff --git a/AgentCrew/modules/gui/components/message_handlers.py b/AgentCrew/modules/gui/components/message_handlers.py
@@ -9,7 +9,7 @@ def __init__(self, chat_window):
 
         if isinstance(chat_window, ChatWindow):
             self.chat_window = chat_window
-        self.thinking_content = ""
+        self.chat_window.thinking_content = ""
         self.chunk_buffer_queue = []
         self.think_buffer_queue = []
 
@@ -85,13 +85,12 @@ def handle_thinking_started(self, data):
         self.chat_window.current_thinking_bubble = (
             self.chat_window.chat_components.append_thinking_message("", agent_name)
         )
-        self.thinking_content = ""  # Initialize thinking content
-        # self.chat_window.thinking_buffer = ""  # Initialize thinking buffer
+        self.chat_window.thinking_content = ""  # Initialize thinking content
 
     def handle_thinking_chunk(self, chunk):
         """Handle a chunk of the thinking process."""
         self.think_buffer_queue.extend(list(chunk))
-        self.thinking_content += chunk
+        self.chat_window.thinking_content += chunk
         # Use smooth streaming for thinking chunks too
         if self.chat_window.current_thinking_bubble:
             self.chat_window.current_thinking_bubble.add_streaming_chunk(
@@ -104,12 +103,13 @@ def handle_thinking_completed(self):
         # Finalize thinking stream if active
         if self.chat_window.current_thinking_bubble:
             self.chat_window.current_thinking_bubble.raw_text_buffer = (
-                self.thinking_content
+                self.chat_window.thinking_content
             )
             self.chat_window.current_thinking_bubble._finalize_streaming()
         # Reset thinking bubble reference
         self.think_buffer_queue = []
         self.chat_window.current_thinking_bubble = None
+        self.chat_window.thinking_content = ""
 
     def handle_user_context_request(self):
         """Handle user context request."""
diff --git a/AgentCrew/modules/gui/components/tool_handlers.py b/AgentCrew/modules/gui/components/tool_handlers.py
@@ -88,6 +88,14 @@ def handle_tool_error(self, data: Dict):
         self.chat_window.current_response_bubble = None
         self.chat_window.current_response_container = None
 
+        if self.chat_window.current_thinking_bubble:
+            self.chat_window.current_thinking_bubble.raw_text_buffer = (
+                self.chat_window.thinking_content
+            )
+            self.chat_window.current_thinking_bubble._finalize_streaming()
+            self.chat_window.current_thinking_bubble = None
+        self.chat_window.thinking_content = ""
+
     def handle_tool_confirmation_required(self, tool_info):
         """Display a dialog for tool confirmation request."""
         tool_use = tool_info.copy()
diff --git a/AgentCrew/modules/gui/widgets/configs/custom_llm_provider.py b/AgentCrew/modules/gui/widgets/configs/custom_llm_provider.py
@@ -28,7 +28,7 @@ def __init__(
         self,
         provider_name: str,
         model_data: Optional[Dict[str, Any]] = None,
-        existing_model_ids: List[str] = None,
+        existing_model_ids: Optional[List[str]] = None,
         parent=None,
     ):
         super().__init__(parent)
@@ -61,6 +61,7 @@ def __init__(
         self.capabilities_tool_use_checkbox = QCheckBox("Tool Use")
         self.capabilities_thinking_checkbox = QCheckBox("Thinking")
         self.capabilities_vision_checkbox = QCheckBox("Vision")
+        self.capabilities_stream_checkbox = QCheckBox("Stream")
 
         self.input_price_edit = QDoubleSpinBox()
         self.input_price_edit.setDecimals(6)  # Increased precision
@@ -83,6 +84,7 @@ def __init__(
         capabilities_layout.addWidget(self.capabilities_tool_use_checkbox)
         capabilities_layout.addWidget(self.capabilities_thinking_checkbox)
         capabilities_layout.addWidget(self.capabilities_vision_checkbox)
+        capabilities_layout.addWidget(self.capabilities_stream_checkbox)
         capabilities_layout.addStretch()  # To push checkboxes to the left
         form_layout.addRow("Capabilities:", capabilities_layout)
 
@@ -115,6 +117,7 @@ def populate_fields(self, data: Dict[str, Any]):
             "thinking" in current_capabilities
         )
         self.capabilities_vision_checkbox.setChecked("vision" in current_capabilities)
+        self.capabilities_stream_checkbox.setChecked("stream" in current_capabilities)
 
         self.input_price_edit.setValue(data.get("input_token_price_1m", 0.0))
         self.output_price_edit.setValue(data.get("output_token_price_1m", 0.0))
@@ -130,11 +133,12 @@ def get_model_data(self) -> Dict[str, Any]:
             capabilities_list.append("thinking")
         if self.capabilities_vision_checkbox.isChecked():
             capabilities_list.append("vision")
+        if self.capabilities_stream_checkbox.isChecked():
+            capabilities_list.append("stream")
 
         # Ensure Model Pydantic types are respected
         return {
             "id": self.id_edit.text().strip(),
-            "provider": self.provider_name,  # Name of the custom provider
             "name": self.name_edit.text().strip(),
             "description": self.description_edit.toPlainText().strip(),
             "capabilities": capabilities_list,
@@ -254,13 +258,11 @@ def clear_and_disable_form(self):
         self.api_base_url_edit.clear()
         self.api_key_edit.clear()
         self.default_model_id_edit.clear()
-        self.is_stream_checkbox.setChecked(False)
 
         self.name_edit.setEnabled(False)
         self.api_base_url_edit.setEnabled(False)
         self.api_key_edit.setEnabled(False)
         self.default_model_id_edit.setEnabled(False)
-        self.is_stream_checkbox.setEnabled(False)
 
         self.save_button.setEnabled(False)
         self.remove_button.setEnabled(False)
@@ -294,7 +296,6 @@ def on_provider_selected(self, current_item, previous_item):
         self.api_base_url_edit.setText(provider_data.get("api_base_url", ""))
         self.api_key_edit.setText(provider_data.get("api_key", ""))
         self.default_model_id_edit.setText(provider_data.get("default_model_id", ""))
-        self.is_stream_checkbox.setChecked(provider_data.get("is_stream", False))
 
         # Clear and reload header fields
         self.clear_header_fields()
@@ -311,7 +312,6 @@ def on_provider_selected(self, current_item, previous_item):
         self.api_base_url_edit.setEnabled(True)
         self.api_key_edit.setEnabled(True)
         self.default_model_id_edit.setEnabled(True)
-        self.is_stream_checkbox.setEnabled(True)
 
         self.save_button.setEnabled(True)
         self.remove_button.setEnabled(True)
@@ -358,9 +358,6 @@ def add_new_provider_triggered(self):
         self.default_model_id_edit.setEnabled(True)
         self.default_model_id_edit.clear()
 
-        self.is_stream_checkbox.setEnabled(True)
-        self.is_stream_checkbox.setChecked(False)
-
         # Clear existing headers and enable adding new ones
         self.clear_header_fields()
         self.add_header_button.setEnabled(True)
@@ -425,14 +422,12 @@ def init_ui(self):
         self.api_base_url_edit = QLineEdit()
         self.api_key_edit = QLineEdit()
         self.default_model_id_edit = QLineEdit()
-        self.is_stream_checkbox = QCheckBox("Enable Streaming")
 
         form_layout.addRow("Name:", self.name_edit)
         form_layout.addRow("Type:", self.type_display)
         form_layout.addRow("Base URL:", self.api_base_url_edit)
         form_layout.addRow("API Key:", self.api_key_edit)
         form_layout.addRow("Default Model ID:", self.default_model_id_edit)
-        form_layout.addRow("Streaming:", self.is_stream_checkbox)
 
         editor_layout.addLayout(form_layout)
 
@@ -501,7 +496,6 @@ def init_ui(self):
         self.api_base_url_edit.setEnabled(False)
         self.api_key_edit.setEnabled(False)
         self.default_model_id_edit.setEnabled(False)
-        self.is_stream_checkbox.setEnabled(False)
         self.save_button.setEnabled(False)
 
         self.setLayout(main_layout)
@@ -608,7 +602,6 @@ def save_provider_details(self):
         api_base_url = self.api_base_url_edit.text().strip()
         api_key = self.api_key_edit.text().strip()
         default_model_id = self.default_model_id_edit.text().strip()
-        is_stream = self.is_stream_checkbox.isChecked()
 
         if not name or not api_base_url:
             QMessageBox.warning(
@@ -645,7 +638,6 @@ def save_provider_details(self):
             "api_key": api_key,
             "default_model_id": default_model_id,
             "available_models": available_models_data,  # List of model dictionaries
-            "is_stream": is_stream,
             "extra_headers": extra_headers,  # Add the extra_headers field
         }
 
diff --git a/AgentCrew/modules/llm/constants.py b/AgentCrew/modules/llm/constants.py
@@ -205,7 +205,7 @@
         provider="deepinfra",
         name="Llama 3.3 70B Instruct",
         description="Llama 3.3-70B is a multilingual LLM trained on a massive dataset of 15 trillion tokens, fine-tuned for instruction-following and conversational dialogue",
-        capabilities=["tool_use", "text-generation"],
+        capabilities=["tool_use", "text-generation", "stream"],
         input_token_price_1m=0.23,
         output_token_price_1m=0.40,
     ),
@@ -214,7 +214,7 @@
         provider="deepinfra",
         name="Gemma 3 27B",
         description="Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models",
-        capabilities=["text-generation", "tool_use"],
+        capabilities=["text-generation", "tool_use", "stream"],
         input_token_price_1m=0.1,
         output_token_price_1m=0.2,
     ),
@@ -241,7 +241,7 @@
         provider="deepinfra",
         name="Qwen 3 MoE 235B-22B",
         description="Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models",
-        capabilities=["text-generation", "tool_use", "thinking"],
+        capabilities=["text-generation", "tool_use", "thinking", "stream"],
         input_token_price_1m=0.2,
         output_token_price_1m=0.6,
     ),
@@ -250,7 +250,7 @@
         provider="deepinfra",
         name="Qwen 3 32B",
         description="Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models",
-        capabilities=["text-generation", "tool_use"],
+        capabilities=["text-generation", "tool_use", "stream"],
         input_token_price_1m=0.1,
         output_token_price_1m=0.3,
     ),
@@ -268,7 +268,7 @@
         provider="deepinfra",
         name="DeepSeek R1 0528",
         description="The DeepSeek R1 model has undergone a minor version upgrade, with the current version being DeepSeek-R1-0528.",
-        capabilities=["text-generation", "tool_use", "thinking"],
+        capabilities=["text-generation", "tool_use", "thinking", "stream"],
         input_token_price_1m=0.5,
         output_token_price_1m=2.18,
     ),
@@ -279,7 +279,7 @@
         provider="github_copilot",
         name="claude-3.7-sonnet",
         description="",
-        capabilities=["tool_use", "vision"],
+        capabilities=["tool_use", "vision", "stream"],
         default=False,
         input_token_price_1m=0.0,
         output_token_price_1m=0.0,
@@ -289,7 +289,7 @@
         provider="github_copilot",
         name="gemini-2.5-pro-preview-05-06",
         description="",
-        capabilities=["tool_use", "vision"],
+        capabilities=["tool_use", "vision", "stream"],
         default=False,
         input_token_price_1m=0.0,
         output_token_price_1m=0.0,
@@ -299,7 +299,7 @@
         provider="github_copilot",
         name="claude-3.7-sonnet-thought",
         description="",
-        capabilities=["tool_use", "thinking", "vision"],
+        capabilities=["tool_use", "thinking", "vision", "stream"],
         default=False,
         input_token_price_1m=0.0,
         output_token_price_1m=0.0,
@@ -309,7 +309,7 @@
         provider="github_copilot",
         name="gpt-4.1",
         description="",
-        capabilities=["tool_use", "vision"],
+        capabilities=["tool_use", "vision", "stream"],
         default=True,
         input_token_price_1m=0.0,
         output_token_price_1m=0.0,
@@ -319,7 +319,7 @@
         provider="github_copilot",
         name="o4-mini",
         description="",
-        capabilities=["tool_use", "thinking"],
+        capabilities=["tool_use", "thinking", "stream"],
         default=False,
         input_token_price_1m=0.0,
         output_token_price_1m=0.0,
@@ -329,7 +329,7 @@
         provider="github_copilot",
         name="o1",
         description="",
-        capabilities=["tool_use", "thinking", "vision"],
+        capabilities=["tool_use", "thinking", "vision", "stream"],
         default=False,
         input_token_price_1m=0.0,
         output_token_price_1m=0.0,
@@ -339,7 +339,7 @@
         provider="github_copilot",
         name="claude-sonnet-4",
         description="",
-        capabilities=["tool_use", "vision"],
+        capabilities=["tool_use", "vision", "stream"],
         default=False,
         input_token_price_1m=0.0,
         output_token_price_1m=0.0,
diff --git a/AgentCrew/modules/llm/model_registry.py b/AgentCrew/modules/llm/model_registry.py
@@ -47,17 +47,13 @@ def _load_custom_models_from_config(self):
                 provider_name = provider_config.get("name")
                 for model_data_dict in provider_config.get("available_models", []):
                     try:
-                        # Ensure provider name from the outer config is used if not in model_data_dict
-                        if "provider" not in model_data_dict or not model_data_dict.get(
-                            "provider"
-                        ):
-                            if provider_name:
-                                model_data_dict["provider"] = provider_name
-                            else:
-                                print(
-                                    f"Warning: Skipping model due to missing provider name in config: ID '{model_data_dict.get('id', 'N/A')}'"
-                                )
-                                continue
+                        if provider_name:
+                            model_data_dict["provider"] = provider_name
+                        else:
+                            print(
+                                f"Warning: Skipping model due to missing provider name in config: ID '{model_data_dict.get('id', 'N/A')}'"
+                            )
+                            continue
                         model = Model(**model_data_dict)
                         self.register_model(model)
                     except Exception as e:
diff --git a/AgentCrew/modules/llm/service_manager.py b/AgentCrew/modules/llm/service_manager.py

Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,6 @@ def __init__(self):`
`14`	`14`	`api_key=api_key,`
`15`	`15`	`base_url="https://api.deepinfra.com/v1/openai",`
`16`	`16`	`provider_name="deepinfra",`
`17`		`- is_stream=True,`
`18`	`17`	`)`
`19`	`18`	`self.model = "Qwen/Qwen3-235B-A22B"`
`20`	`19`	`self.current_input_tokens = 0`