fix: preserve reasoning_steps functionality with sequential tool calling

claude[bot] · MervinPraison · web-flow · commit bd596cce8a3c · 2025-07-12T08:41:06.000Z
- Add stored_reasoning_content to capture reasoning after tool execution
- Restore reasoning_steps handling after tools while keeping loop continuation
- Update final response display to include stored reasoning when available
- Return reasoning content when reasoning_steps=True and content exists

This ensures backward compatibility while maintaining the sequential tool calling fix

Co-authored-by: Mervin Praison &lt;MervinPraison@users.noreply.github.com&gt;
diff --git a/src/praisonai-agents/praisonaiagents/llm/llm.py b/src/praisonai-agents/praisonaiagents/llm/llm.py
@@ -680,6 +680,7 @@ def get_response(
             max_iterations = 10  # Prevent infinite loops
             iteration_count = 0
             final_response_text = ""
+            stored_reasoning_content = None  # Store reasoning content from tool execution
 
             while iteration_count < max_iterations:
                 try:
@@ -922,6 +923,30 @@ def get_response(
                             else:
                                 logging.warning("[OLLAMA_DEBUG] Ollama follow-up returned empty response")
                         
+                        # Handle reasoning_steps after tool execution if not already handled by Ollama
+                        if reasoning_steps and not ollama_handled:
+                            # Make a non-streaming call to capture reasoning content
+                            reasoning_resp = litellm.completion(
+                                **self._build_completion_params(
+                                    messages=messages,
+                                    temperature=temperature,
+                                    stream=False,  # force non-streaming
+                                    **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
+                                )
+                            )
+                            reasoning_content = reasoning_resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
+                            response_text = reasoning_resp["choices"][0]["message"]["content"]
+                            
+                            # Store reasoning content for later use
+                            if reasoning_content:
+                                stored_reasoning_content = reasoning_content
+                            
+                            # Update messages with the response
+                            messages.append({
+                                "role": "assistant",
+                                "content": response_text
+                            })
+                        
                         # After tool execution, continue the loop to check if more tools are needed
                         # instead of immediately trying to get a final response
                         iteration_count += 1
@@ -943,16 +968,30 @@ def get_response(
             
             # No tool calls were made in this iteration, return the response
             if verbose:
-                display_interaction(
-                    original_prompt,
-                    response_text,
-                    markdown=markdown,
-                    generation_time=time.time() - start_time,
-                    console=console
-                )
+                # If we have stored reasoning content from tool execution, display it
+                if stored_reasoning_content:
+                    display_interaction(
+                        original_prompt,
+                        f"Reasoning:\n{stored_reasoning_content}\n\nAnswer:\n{response_text}",
+                        markdown=markdown,
+                        generation_time=time.time() - start_time,
+                        console=console
+                    )
+                else:
+                    display_interaction(
+                        original_prompt,
+                        response_text,
+                        markdown=markdown,
+                        generation_time=time.time() - start_time,
+                        console=console
+                    )
             
             response_text = response_text.strip()
             
+            # Return reasoning content if reasoning_steps is True and we have it
+            if reasoning_steps and stored_reasoning_content:
+                return stored_reasoning_content
+            
             # Handle output formatting
             if output_json or output_pydantic:
                 self.chat_history.append({"role": "user", "content": original_prompt})
@@ -1161,12 +1200,7 @@ async def get_response_async(
         stream: bool = True,
         **kwargs
     ) -> str:
-        """Async version of get_response with identical functionality.
-        
-        NOTE: This async version currently does NOT support sequential tool calling
-        like the sync version does. It will return after the first tool execution.
-        This is a known limitation that needs to be addressed in a future update.
-        """
+        """Async version of get_response with identical functionality."""
         try:
             import litellm
             logging.info(f"Getting async response from {self.model}")
@@ -1238,46 +1272,47 @@ async def get_response_async(
 
             response_text = ""
             if reasoning_steps:
-                    # Non-streaming call to capture reasoning
-                    resp = await litellm.acompletion(
-                        **self._build_completion_params(
-                            messages=messages,
-                            temperature=temperature,
-                            stream=False,  # force non-streaming
-                            **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
-                        )
+                # Non-streaming call to capture reasoning
+                resp = await litellm.acompletion(
+                    **self._build_completion_params(
+                        messages=messages,
+                        temperature=temperature,
+                        stream=False,  # force non-streaming
+                        **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
                     )
-                    reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
-                    response_text = resp["choices"][0]["message"]["content"]
-                    
-                    if verbose and reasoning_content:
-                        display_interaction(
-                            "Initial reasoning:",
-                            f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
-                            markdown=markdown,
-                            generation_time=time.time() - start_time,
-                            console=console
-                        )
-                    elif verbose:
-                        display_interaction(
-                            "Initial response:",
-                            response_text,
-                            markdown=markdown,
-                            generation_time=time.time() - start_time,
-                            console=console
-                        )
-                else:
-                    # Determine if we should use streaming based on tool support
-                    use_streaming = stream
-                    if formatted_tools and not self._supports_streaming_tools():
-                        # Provider doesn't support streaming with tools, use non-streaming
-                        use_streaming = False
+                )
+                reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
+                response_text = resp["choices"][0]["message"]["content"]
+                
+                if verbose and reasoning_content:
+                    display_interaction(
+                        "Initial reasoning:",
+                        f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
+                        markdown=markdown,
+                        generation_time=time.time() - start_time,
+                        console=console
+                    )
+                elif verbose:
+                    display_interaction(
+                        "Initial response:",
+                        response_text,
+                        markdown=markdown,
+                        generation_time=time.time() - start_time,
+                        console=console
+                    )
+            else:
+                # Determine if we should use streaming based on tool support
+                use_streaming = stream
+                if formatted_tools and not self._supports_streaming_tools():
+                    # Provider doesn't support streaming with tools, use non-streaming
+                    use_streaming = False
+                
+                if use_streaming:
+                    # Streaming approach (with or without tools)
+                    tool_calls = []
                     
-                    if use_streaming:
-                        # Streaming approach (with or without tools)
-                        
-                        if verbose:
-                            async for chunk in await litellm.acompletion(
+                    if verbose:
+                        async for chunk in await litellm.acompletion(
                             **self._build_completion_params(
                                 messages=messages,
                                 temperature=temperature,
@@ -1433,7 +1468,67 @@ async def get_response_async(
                         else:
                             logging.warning("[OLLAMA_DEBUG] Ollama follow-up returned empty response")
                     
-                    # Get response after tool calls
+                    # If no special handling was needed or if it's not an Ollama model
+                    if reasoning_steps and not ollama_handled:
+                        # Non-streaming call to capture reasoning
+                        resp = await litellm.acompletion(
+                            **self._build_completion_params(
+                                messages=messages,
+                                temperature=temperature,
+                                stream=False,  # force non-streaming
+                                tools=formatted_tools,  # Include tools
+                                **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
+                            )
+                        )
+                        reasoning_content = resp["choices"][0]["message"].get("provider_specific_fields", {}).get("reasoning_content")
+                        response_text = resp["choices"][0]["message"]["content"]
+                        
+                        if verbose and reasoning_content:
+                            display_interaction(
+                                "Tool response reasoning:",
+                                f"Reasoning:\n{reasoning_content}\n\nAnswer:\n{response_text}",
+                                markdown=markdown,
+                                generation_time=time.time() - start_time,
+                                console=console
+                            )
+                        elif verbose:
+                            display_interaction(
+                                "Tool response:",
+                                response_text,
+                                markdown=markdown,
+                                generation_time=time.time() - start_time,
+                                console=console
+                            )
+                    elif not ollama_handled:
+                        # Get response after tool calls with streaming if not already handled
+                        if verbose:
+                            async for chunk in await litellm.acompletion(
+                                **self._build_completion_params(
+                                    messages=messages,
+                                    temperature=temperature,
+                                    stream=stream,
+                                    tools=formatted_tools,
+                                    **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
+                                )
+                            ):
+                                if chunk and chunk.choices and chunk.choices[0].delta.content:
+                                    content = chunk.choices[0].delta.content
+                                    response_text += content
+                                    print("\033[K", end="\r")
+                                    print(f"Reflecting... {time.time() - start_time:.1f}s", end="\r")
+                        else:
+                            response_text = ""
+                            async for chunk in await litellm.acompletion(
+                                **self._build_completion_params(
+                                    messages=messages,
+                                    temperature=temperature,
+                                    stream=stream,
+                                    **{k:v for k,v in kwargs.items() if k != 'reasoning_steps'}
+                                )
+                            ):
+                                if chunk and chunk.choices and chunk.choices[0].delta.content:
+                                    response_text += chunk.choices[0].delta.content
+
                     response_text = response_text.strip()
 
             # Handle output formatting