fix(llm): fix reasoning on custom llm

daltonnyx · daltonnyx · commit 6776defdf4d4 · 2025-11-24T22:27:43.000+07:00
diff --git a/AgentCrew/__init__.py b/AgentCrew/__init__.py
@@ -1 +1 @@
-__version__ = "0.8.4"
+__version__ = "0.8.5"
diff --git a/AgentCrew/modules/chat/message/handler.py b/AgentCrew/modules/chat/message/handler.py
@@ -388,9 +388,6 @@ def process_result(_tool_uses, _input_tokens, _output_tokens):
 
                 return await self.get_assistant_response()
 
-            if thinking_content:
-                self._notify("agent_continue", self.agent.name)
-
             # Add assistant response to messages
             if assistant_response.strip():
                 self._messages_append(
diff --git a/AgentCrew/modules/console/console_ui.py b/AgentCrew/modules/console/console_ui.py
@@ -20,7 +20,6 @@
     RICH_STYLE_GREEN,
     RICH_STYLE_BLUE,
     RICH_STYLE_YELLOW,
-    RICH_STYLE_GREEN_BOLD,
     RICH_STYLE_YELLOW_BOLD,
     PROMPT_CHAR,
 )
diff --git a/AgentCrew/modules/custom_llm/deepinfra_service.py b/AgentCrew/modules/custom_llm/deepinfra_service.py
@@ -45,19 +45,32 @@ def _process_stream_chunk(
                 thinking_data
             )
         """
-        chunk_text = None
+        chunk_text = ""
         input_tokens = 0
         output_tokens = 0
         thinking_content = None  # OpenAI doesn't support thinking mode
 
+        if (not chunk.choices) or (len(chunk.choices) == 0):
+            return (
+                assistant_response or " ",
+                tool_uses,
+                input_tokens,
+                output_tokens,
+                "",
+                (thinking_content, None) if thinking_content else None,
+            )
+
+        delta_chunk = chunk.choices[0].delta
         # Handle regular content chunks
+        #
         if (
-            chunk.choices
-            and len(chunk.choices) > 0
-            and hasattr(chunk.choices[0].delta, "content")
-            and chunk.choices[0].delta.content is not None
+            hasattr(delta_chunk, "reasoning_content")
+            and delta_chunk.reasoning_content is not None
         ):
-            chunk_text = chunk.choices[0].delta.content
+            thinking_content = delta_chunk.reasoning_content
+
+        if hasattr(delta_chunk, "content") and delta_chunk.content is not None:
+            chunk_text = delta_chunk.content
             if "<think>" in chunk_text:
                 self._is_thinking = True
 
@@ -87,11 +100,7 @@ def _process_stream_chunk(
                 output_tokens = chunk.usage.completion_tokens
 
         # Handle tool call chunks
-        if (
-            chunk.choices
-            and len(chunk.choices) > 0
-            and hasattr(chunk.choices[0].delta, "tool_calls")
-        ):
+        if hasattr(delta_chunk, "tool_calls"):
             delta_tool_calls = chunk.choices[0].delta.tool_calls
             if delta_tool_calls:
                 # Process each tool call in the delta
@@ -162,14 +171,6 @@ def _process_stream_chunk(
                             except json.JSONDecodeError:
                                 # Arguments JSON is still incomplete, keep accumulating
                                 pass
-                return (
-                    assistant_response or " ",
-                    tool_uses,
-                    input_tokens,
-                    output_tokens,
-                    "",
-                    (thinking_content, None) if thinking_content else None,
-                )
 
         return (
             assistant_response or " ",
diff --git a/AgentCrew/modules/custom_llm/github_copilot_service.py b/AgentCrew/modules/custom_llm/github_copilot_service.py
@@ -79,13 +79,6 @@ def _convert_internal_format(self, messages: List[Dict[str, Any]]):
         thinking_block = None
         for i, msg in enumerate(messages):
             msg.pop("agent", None)
-            if "tool_calls" in msg and msg.get("tool_calls", []):
-                for tool_call in msg["tool_calls"]:
-                    tool_call["function"] = {}
-                    tool_call["function"]["name"] = tool_call.pop("name", "")
-                    tool_call["function"]["arguments"] = json.dumps(
-                        tool_call.pop("arguments", {})
-                    )
             if msg.get("role") == "assistant":
                 if thinking_block:
                     msg["reasoning_text"] = thinking_block.get("thinking", "")
@@ -103,6 +96,14 @@ def _convert_internal_format(self, messages: List[Dict[str, Any]]):
                     )
                     msg["content"] = []
 
+            if "tool_calls" in msg and msg.get("tool_calls", []):
+                for tool_call in msg["tool_calls"]:
+                    tool_call["function"] = {}
+                    tool_call["function"]["name"] = tool_call.pop("name", "")
+                    tool_call["function"]["arguments"] = json.dumps(
+                        tool_call.pop("arguments", {})
+                    )
+
             if msg.get("role") == "tool":
                 # Special treatment for GitHub Copilot GPT-4.1 model
                 # At the the time of writing, GitHub Copilot GPT-4.1 model cannot read tool results with array content
@@ -174,29 +175,32 @@ def _process_stream_chunk(
         thinking_content = None  # OpenAI doesn't support thinking mode
         thinking_signature = None
 
+        if (not chunk.choices) or (len(chunk.choices) == 0):
+            return (
+                assistant_response or " ",
+                tool_uses,
+                input_tokens,
+                output_tokens,
+                "",
+                (thinking_content, None) if thinking_content else None,
+            )
+
+        delta_chunk = chunk.choices[0].delta
+
         # Handle thinking content
         if (
-            chunk.choices
-            and len(chunk.choices) > 0
-            and hasattr(chunk.choices[0].delta, "reasoning_text")
-            and chunk.choices[0].delta.reasoning_text is not None
+            hasattr(delta_chunk, "reasoning_text")
+            and delta_chunk.reasoning_text is not None
         ):
-            thinking_content = chunk.choices[0].delta.reasoning_text
+            thinking_content = delta_chunk.reasoning_text
 
         if (
-            chunk.choices
-            and len(chunk.choices) > 0
-            and hasattr(chunk.choices[0].delta, "reasoning_opaque")
-            and chunk.choices[0].delta.reasoning_opaque is not None
+            hasattr(delta_chunk, "reasoning_opaque")
+            and delta_chunk.reasoning_opaque is not None
         ):
-            thinking_signature = chunk.choices[0].delta.reasoning_opaque
+            thinking_signature = delta_chunk.reasoning_opaque
         # Handle regular content chunks
-        if (
-            chunk.choices
-            and len(chunk.choices) > 0
-            and hasattr(chunk.choices[0].delta, "content")
-            and chunk.choices[0].delta.content is not None
-        ):
+        if hasattr(delta_chunk, "content") and delta_chunk.content is not None:
             chunk_text = chunk.choices[0].delta.content
             assistant_response += chunk_text
 
@@ -208,11 +212,7 @@ def _process_stream_chunk(
                 output_tokens = chunk.usage.completion_tokens
 
         # Handle tool call chunks
-        if (
-            chunk.choices
-            and len(chunk.choices) > 0
-            and hasattr(chunk.choices[0].delta, "tool_calls")
-        ):
+        if hasattr(delta_chunk, "tool_calls"):
             delta_tool_calls = chunk.choices[0].delta.tool_calls
             if delta_tool_calls:
                 # Process each tool call in the delta
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "agentcrew-ai"
-version = "0.8.4"
+version = "0.8.5"
 requires-python = ">=3.12"
 classifiers = [
     "Programming Language :: Python :: 3",
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.8.4"`
	`1`	`+__version__ = "0.8.5"`
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,6 @@`
`20`	`20`	`RICH_STYLE_GREEN,`
`21`	`21`	`RICH_STYLE_BLUE,`
`22`	`22`	`RICH_STYLE_YELLOW,`
`23`		`- RICH_STYLE_GREEN_BOLD,`
`24`	`23`	`RICH_STYLE_YELLOW_BOLD,`
`25`	`24`	`PROMPT_CHAR,`
`26`	`25`	`)`