MervinPraison · MervinPraison · Apr 11, 2026 · Apr 11, 2026 · Apr 11, 2026 · gemini-code-assist
diff --git a/src/praisonai-agents/praisonaiagents/agent/agent.py b/src/praisonai-agents/praisonaiagents/agent/agent.py
@@ -169,11 +169,70 @@ def _is_file_path(value: str) -> bool:
 # Applied even when context management is disabled to prevent runaway tool outputs
 DEFAULT_TOOL_OUTPUT_LIMIT = 16000
 
-# Global variables for API server (protected by _server_lock for thread safety)
-_server_lock = threading.Lock()
-_server_started = {}  # Dict of port -> started boolean
-_registered_agents = {}  # Dict of port -> Dict of path -> agent_id
-_shared_apps = {}  # Dict of port -> FastAPI app
+class ServerRegistry:
+    """Registry for API server state per-port."""
+
+    def __init__(self):
+        self._lock = threading.Lock()
+        self._server_started = {}  # Dict of port -> started boolean
+        self._registered_agents = {}  # Dict of port -> Dict of path -> agent_id  
+        self._shared_apps = {}  # Dict of port -> FastAPI app
+
+    @staticmethod
+    def get_default_instance():
+        """Get default global registry for backward compatibility."""
+        if not hasattr(ServerRegistry, '_default_instance'):
+            import threading
+            # Double-checked locking pattern for thread safety
+            with threading.Lock():
+                if not hasattr(ServerRegistry, '_default_instance'):
+                    ServerRegistry._default_instance = ServerRegistry()
+        return ServerRegistry._default_instance
+
+    def is_server_started(self, port: int) -> bool:
+        with self._lock:
+            return self._server_started.get(port, False)
+
+    def set_server_started(self, port: int, started: bool) -> None:
+        with self._lock:
+            self._server_started[port] = started
+
+    def get_shared_app(self, port: int):
+        with self._lock:
+            return self._shared_apps.get(port)
+
+    def set_shared_app(self, port: int, app) -> None:
+        with self._lock:
+            self._shared_apps[port] = app
+
+    def register_agent(self, port: int, path: str, agent_id: str) -> None:
+        with self._lock:
+            if port not in self._registered_agents:
+                self._registered_agents[port] = {}
+            self._registered_agents[port][path] = agent_id
+
+    def get_registered_agents(self, port: int) -> dict:
+        with self._lock:
+            return self._registered_agents.get(port, {}).copy()
+
+    def cleanup_agent_registrations(self, agent_id: str) -> None:
+        """Remove all registrations for an agent ID and clean empty port state."""
+        with self._lock:
+            ports_to_clean = []
+            for port, path_dict in self._registered_agents.items():
+                paths_to_remove = [path for path, registered_id in path_dict.items() if registered_id == agent_id]
+                for path in paths_to_remove:
+                    del path_dict[path]
+                if not path_dict:
+                    ports_to_clean.append(port)
+
+            for port in ports_to_clean:
+                self._registered_agents.pop(port, None)
+                self._server_started.pop(port, None)
+
+# Backward compatibility - use default instance
+def _get_default_server_registry() -> ServerRegistry:
+    return ServerRegistry.get_default_instance()
 
 # Don't import FastAPI dependencies here - use lazy loading instead
 
@@ -4571,28 +4630,7 @@ def _cleanup_server_registrations(self) -> None:
             return  # No ID generated, nothing registered
 
         try:
-            agent_id = self._agent_id
-            with _server_lock:
-                # Remove from _registered_agents
-                ports_to_clean = []
-                for port, path_dict in _registered_agents.items():
-                    paths_to_remove = []
-                    for path, registered_id in path_dict.items():
-                        if registered_id == agent_id:
-                            paths_to_remove.append(path)
-
-                    for path in paths_to_remove:
-                        del path_dict[path]
-
-                    # If no paths left for this port, mark port for cleanup
-                    if not path_dict:
-                        ports_to_clean.append(port)
-
-                # Clean up empty port entries
-                for port in ports_to_clean:
-                    _registered_agents.pop(port, None)
-                    _server_started.pop(port, None)
-                    # Note: We don't clean up _shared_apps here as other agents might be using them
+            _get_default_server_registry().cleanup_agent_registrations(self._agent_id)
 
         except Exception as e:
             import sys
@@ -4629,4 +4667,3 @@ def is_closed(self) -> bool:
 
     def __str__(self):
         return f"Agent(name='{self.name}', role='{self.role}', goal='{self.goal}')"
-
diff --git a/src/praisonai-agents/praisonaiagents/agent/chat_mixin.py b/src/praisonai-agents/praisonaiagents/agent/chat_mixin.py
@@ -536,9 +536,9 @@ def _chat_completion(self, messages, temperature=1.0, tools=None, stream=True, r
         formatted_tools = self._format_tools_for_completion(tools)
 
         try:
-            # NEW: Unified protocol dispatch path (Issue #1304)
-            # Check if unified dispatch is enabled (opt-in for backward compatibility)
-            if getattr(self, '_use_unified_llm_dispatch', False):
+            # NEW: Unified protocol dispatch path (Issue #1304, #1362)
+            # Enable unified dispatch by default for DRY and feature parity
+            if getattr(self, '_use_unified_llm_dispatch', True):
                 # Use composition instead of runtime class mutation for safety
                 final_response = self._execute_unified_chat_completion(
                     messages=messages,
@@ -579,50 +579,25 @@ def _chat_completion(self, messages, temperature=1.0, tools=None, stream=True, r
                         reasoning_steps=reasoning_steps
                     )
                 else:
-                    # Non-streaming with custom LLM - don't show streaming-like behavior
-                    if False:  # Don't use display_generating when stream=False to avoid streaming-like behavior
-                        # This block is disabled to maintain consistency with the OpenAI path fix
-                        with _get_live()(
-                            _get_display_functions()['display_generating']("", start_time),
-                            console=self.console,
-                            refresh_per_second=4,
-                        ) as live:
-                            final_response = self.llm_instance.get_response(
-                                prompt=messages[1:],
-                                system_prompt=messages[0]['content'] if messages and messages[0]['role'] == 'system' else None,
-                                temperature=temperature,
-                                tools=formatted_tools if formatted_tools else None,
-                                verbose=self.verbose,
-                                markdown=self.markdown,
-                                stream=stream,
-                                console=self.console,
-                                execute_tool_fn=self.execute_tool,
-                                agent_name=self.name,
-                                agent_role=self.role,
-                                agent_tools=[getattr(t, '__name__', str(t)) for t in self.tools] if self.tools else None,
-                                task_name=task_name,
-                                task_description=task_description,
-                                task_id=task_id,
-                                reasoning_steps=reasoning_steps
-                            )
-                    else:
-                        final_response = self.llm_instance.get_response(
-                            prompt=messages[1:],
-                            system_prompt=messages[0]['content'] if messages and messages[0]['role'] == 'system' else None,
-                            temperature=temperature,
-                            tools=formatted_tools if formatted_tools else None,
-                            verbose=self.verbose,
-                            markdown=self.markdown,
-                            stream=stream,
-                            console=self.console,
-                            execute_tool_fn=self.execute_tool,
-                            agent_name=self.name,
-                            agent_role=self.role,
-                            agent_tools=[getattr(t, '__name__', str(t)) for t in self.tools] if self.tools else None,
-                            task_name=task_name,
-                            task_description=task_description,
-                            task_id=task_id,
-                            reasoning_steps=reasoning_steps
+                    # Non-streaming with custom LLM - direct execution
+                    has_system = bool(messages and messages[0].get('role') == 'system')
+                    final_response = self.llm_instance.get_response(
+                        prompt=messages[1:] if has_system else messages,
+                        system_prompt=messages[0]['content'] if has_system else None,
+                        temperature=temperature,
+                        tools=formatted_tools if formatted_tools else None,
+                        verbose=self.verbose,
+                        markdown=self.markdown,
+                        stream=stream,
+                        console=self.console,
+                        execute_tool_fn=self.execute_tool,
+                        agent_name=self.name,
+                        agent_role=self.role,
+                        agent_tools=[getattr(t, '__name__', str(t)) for t in self.tools] if self.tools else None,
+                        task_name=task_name,
+                        task_description=task_description,
+                        task_id=task_id,
+                        reasoning_steps=reasoning_steps
                         )
             else:
                 # Use the standard OpenAI client approach with tool support
@@ -1860,8 +1835,8 @@ async def _achat_impl(self, prompt, temperature, tools, output_json, output_pyda
                     formatted_tools = self._format_tools_for_completion(tools)
 
                     # NEW: Unified protocol dispatch path (Issue #1304) - Async version
-                    # Check if unified dispatch is enabled (opt-in for backward compatibility)
-                    if getattr(self, '_use_unified_llm_dispatch', False):
+                    # Enable unified dispatch by default for DRY and feature parity (sync/async consistent)
+                    if getattr(self, '_use_unified_llm_dispatch', True):
                         # Use composition instead of runtime class mutation for safety
                         response = await self._execute_unified_achat_completion(
                             messages=messages,

diff --git a/src/praisonai-agents/praisonaiagents/knowledge/index.py b/src/praisonai-agents/praisonaiagents/knowledge/index.py
@@ -101,13 +101,22 @@ def stats(self) -> IndexStats:
 class IndexRegistry:
     """Registry for index types."""
 
-    _instance: Optional["IndexRegistry"] = None
+    def __init__(self):
+        """Initialize a new registry instance."""
+        self._indices: Dict[str, Callable[..., IndexProtocol]] = {}
 
-    def __new__(cls) -> "IndexRegistry":
-        if cls._instance is None:
-            cls._instance = super().__new__(cls)
-            cls._instance._indices: Dict[str, Callable[..., IndexProtocol]] = {}
-        return cls._instance
+    @classmethod
+    def default(cls) -> "IndexRegistry":
+        """Get a default global registry instance for convenience."""
+        if not hasattr(cls, '_default_instance'):
+            import threading
+            # Use lock for thread safety 
+            if not hasattr(cls, '_init_lock'):
+                cls._init_lock = threading.Lock()
+            with cls._init_lock:
+                if not hasattr(cls, '_default_instance'):
+                    cls._default_instance = cls()
+        return cls._default_instance
 
     def register(self, name: str, factory: Callable[..., IndexProtocol]) -> None:
         """Register an index factory."""
@@ -133,7 +142,7 @@ def clear(self) -> None:
 
 def get_index_registry() -> IndexRegistry:
     """Get the global index registry instance."""
-    return IndexRegistry()
+    return IndexRegistry.default()
 
 class KeywordIndex:
     """

diff --git a/src/praisonai-agents/praisonaiagents/llm/adapters/__init__.py b/src/praisonai-agents/praisonaiagents/llm/adapters/__init__.py
@@ -57,6 +57,25 @@ def handle_empty_response_with_tools(self, state: Dict[str, Any]) -> bool:
 
     def get_default_settings(self) -> Dict[str, Any]:
         return {}  # No provider-specific defaults
+
+    def parse_tool_calls(self, raw_response: Dict[str, Any]) -> Optional[List[Dict[str, Any]]]:
+        """Default tool call parsing - use OpenAI-style format."""
+        if "choices" in raw_response and len(raw_response["choices"]) > 0:
+            message = raw_response["choices"][0].get("message", {})
+            return message.get("tool_calls")
+        return None
+
+    def should_skip_streaming_with_tools(self) -> bool:
+        return False  # Most providers support streaming with tools
+
+    def recover_tool_calls_from_text(self, response_text: str, tools: List[Dict[str, Any]]) -> Optional[List[Dict[str, Any]]]:
+        return None  # No text recovery by default
+
+    def inject_cache_control(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        return messages  # No cache control by default
+
+    def extract_reasoning_tokens(self, response: Dict[str, Any]) -> int:
+        return 0  # No reasoning tokens by default
 
 
 class OllamaAdapter(DefaultAdapter):
@@ -99,6 +118,37 @@ def handle_empty_response_with_tools(self, state: Dict[str, Any]) -> bool:
             return True  # Signal that special handling is needed
         return False
 
+    def recover_tool_calls_from_text(self, response_text: str, tools: List[Dict[str, Any]]) -> Optional[List[Dict[str, Any]]]:
+        """Ollama-specific tool call recovery from response text."""
+        if not response_text or not tools:
+            return None
+
+        try:
+            import json
+            response_json = json.loads(response_text.strip())
+
+            # Normalize to list so both single and multi-tool payloads are supported
+            if isinstance(response_json, dict):
+                response_json = [response_json]
+
+            if isinstance(response_json, list):
+                tool_calls: List[Dict[str, Any]] = []
+                for idx, tool_json in enumerate(response_json):
+                    if isinstance(tool_json, dict) and "name" in tool_json:
+                        tool_calls.append({
+                            "id": f"call_{tool_json['name']}_{idx}_{hash(response_text) % 10000}",
+                            "type": "function",
+                            "function": {
+                                "name": tool_json["name"],
+                                "arguments": json.dumps(tool_json.get("arguments", {}))
+                            }
+                        })
+                return tool_calls if tool_calls else None
+        except (json.JSONDecodeError, TypeError, KeyError):
+            pass
+
+        return None
+
     def post_tool_iteration(self, state: Dict[str, Any]) -> None:
         # Replaces: Ollama-specific post-tool summary branches
         if (not state.get('response_text', '').strip() and 
@@ -142,6 +192,13 @@ class GeminiAdapter(DefaultAdapter):
     - Supports structured output
     """
 
+    def should_skip_streaming_with_tools(self) -> bool:
+        """Gemini should skip streaming when tools are present."""
+        return True
+
+    def supports_structured_output(self) -> bool:
+        return True
+
     def format_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         # Replaces: gemini_internal_tools handling in llm.py
         # Internal tool names match GEMINI_INTERNAL_TOOLS: {'googleSearch', 'urlContext', 'codeExecution'}
@@ -160,9 +217,6 @@ def format_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
     def supports_streaming_with_tools(self) -> bool:
         # Gemini has issues with streaming + tools
         return False
-
-    def supports_structured_output(self) -> bool:
-        return True
 
 
 # Provider adapter registry - public for extension
@@ -236,4 +290,4 @@ def has_provider_adapter(name: str) -> bool:
     'add_provider_adapter',
     'list_provider_adapters',
     'has_provider_adapter',
-]
+]
diff --git a/src/praisonai-agents/praisonaiagents/llm/protocols.py b/src/praisonai-agents/praisonaiagents/llm/protocols.py
@@ -326,6 +326,26 @@ def handle_empty_response_with_tools(self, state: Dict[str, Any]) -> bool:
     def get_default_settings(self) -> Dict[str, Any]:
         """Get provider-specific default settings."""
         ...
+
+    def parse_tool_calls(self, raw_response: Dict[str, Any]) -> Optional[List[Dict[str, Any]]]:
+        """Parse tool calls from provider-specific response format."""
+        ...
+
+    def should_skip_streaming_with_tools(self) -> bool:
+        """Check if provider should skip streaming when tools are present."""
+        ...
+
+    def recover_tool_calls_from_text(self, response_text: str, tools: List[Dict[str, Any]]) -> Optional[List[Dict[str, Any]]]:
+        """Attempt to recover tool calls from response text for providers that don't format them properly."""
+        ...
+
+    def inject_cache_control(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Inject provider-specific cache control headers."""
+        ...
+
+    def extract_reasoning_tokens(self, response: Dict[str, Any]) -> int:
+        """Extract reasoning token count from provider-specific response."""
+        ...
 
 
 @runtime_checkable