MervinPraison
diff --git a/‎src/praisonai-agents/praisonaiagents/llm/llm.py‎
Lines changed: 101 additions & 19 deletions b/‎src/praisonai-agents/praisonaiagents/llm/llm.py‎
Lines changed: 101 additions & 19 deletions
diff --git a/‎src/praisonai-agents/pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎src/praisonai-agents/pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/praisonai-agents/test.py‎
Lines changed: 31 additions & 25 deletions b/‎src/praisonai-agents/test.py‎
Lines changed: 31 additions & 25 deletions
@@ -549,6 +549,7 @@ def get_response(
                         })
 
                         should_continue = False
+                        tool_results = []  # Store all tool results
                         for tool_call in tool_calls:
                             # Handle both object and dict access patterns
                             if isinstance(tool_call, dict):
@@ -569,6 +570,7 @@ def get_response(
                             logging.debug(f"[TOOL_EXEC_DEBUG] About to execute tool {function_name} with args: {arguments}")
                             tool_result = execute_tool_fn(function_name, arguments)
                             logging.debug(f"[TOOL_EXEC_DEBUG] Tool execution result: {tool_result}")
+                            tool_results.append(tool_result)  # Store the result
 
                             if verbose:
                                 display_message = f"Agent {agent_name} called function '{function_name}' with arguments: {arguments}\n"
@@ -601,7 +603,8 @@ def get_response(
                         # If we reach here, no more tool calls needed - get final response
                         # Make one more call to get the final summary response
                         # Special handling for Ollama models that don't automatically process tool results
-                        if self.model and self.model.startswith("ollama/") and tool_result:
+                        ollama_handled = False
+                        if self.model and self.model.startswith("ollama/") and tool_results:
                             # For Ollama models, we need to explicitly ask the model to process the tool results
                             # First, check if the response is just a JSON tool call
                             try:
@@ -614,13 +617,30 @@ def get_response(
                                     # Create a prompt that asks the model to process the tool results based on original context
                                     # Extract the original user query from messages
                                     original_query = ""
-                                    for msg in messages:
+                                    for msg in reversed(messages):  # Look from the end to find the most recent user message
                                         if msg.get("role") == "user":
-                                            original_query = msg.get("content", "")
-                                            break
+                                            content = msg.get("content", "")
+                                            # Handle list content (multimodal)
+                                            if isinstance(content, list):
+                                                for item in content:
+                                                    if isinstance(item, dict) and item.get("type") == "text":
+                                                        original_query = item.get("text", "")
+                                                        break
+                                            else:
+                                                original_query = content
+                                            if original_query:
+                                                break
+                                    
+                                    # Create a shorter follow-up prompt with all tool results
+                                    # If there's only one result, use it directly; otherwise combine them
+                                    if len(tool_results) == 1:
+                                        results_text = json.dumps(tool_results[0], indent=2)
+                                    else:
+                                        results_text = json.dumps(tool_results, indent=2)
 
-                                    # Create a shorter follow-up prompt
-                                    follow_up_prompt = f"Results:\n{json.dumps(tool_result, indent=2)}\nProvide Answer to this Original Question based on the above results: '{original_query}'"
+                                    follow_up_prompt = f"Results:\n{results_text}\nProvide Answer to this Original Question based on the above results: '{original_query}'"
+                                    logging.debug(f"[OLLAMA_DEBUG] Original query extracted: {original_query}")
+                                    logging.debug(f"[OLLAMA_DEBUG] Follow-up prompt: {follow_up_prompt[:200]}...")
 
                                     # Make a follow-up call to process the results
                                     follow_up_messages = [
@@ -653,12 +673,33 @@ def get_response(
                                         ):
                                             if chunk and chunk.choices and chunk.choices[0].delta.content:
                                                 response_text += chunk.choices[0].delta.content
+                                    
+                                    # Set flag to indicate Ollama was handled
+                                    ollama_handled = True
+                                    final_response_text = response_text.strip()
+                                    logging.debug(f"[OLLAMA_DEBUG] Ollama follow-up response: {final_response_text[:200]}...")
+                                    
+                                    # Display the response if we got one
+                                    if final_response_text and verbose:
+                                        display_interaction(
+                                            original_prompt,
+                                            final_response_text,
+                                            markdown=markdown,
+                                            generation_time=time.time() - start_time,
+                                            console=console
+                                        )
+                                    
+                                    # Return the final response after processing Ollama's follow-up
+                                    if final_response_text:
+                                        return final_response_text
+                                    else:
+                                        logging.warning("[OLLAMA_DEBUG] Ollama follow-up returned empty response")
                             except (json.JSONDecodeError, KeyError):
                                 # Not a JSON response or not a tool call format, continue normally
                                 pass
 
-                        # If reasoning_steps is True, do a single non-streaming call
-                        elif reasoning_steps:
+                        # If reasoning_steps is True and we haven't handled Ollama already, do a single non-streaming call
+                        if reasoning_steps and not ollama_handled:
                             resp = litellm.completion(
                                 **self._build_completion_params(
                                     messages=messages,
@@ -688,8 +729,8 @@ def get_response(
                                     console=console
                                 )
 
-                        # Otherwise do the existing streaming approach
-                        else:
+                        # Otherwise do the existing streaming approach if not already handled
+                        elif not ollama_handled:
                             # Get response after tool calls with streaming
                             if verbose:
                                 with Live(display_generating("", current_time), console=console, refresh_per_second=4) as live:
@@ -1225,6 +1266,7 @@ async def get_response_async(
                         "tool_calls": serializable_tool_calls
                     })
 
+                    tool_results = []  # Store all tool results
                     for tool_call in tool_calls:
                         # Handle both object and dict access patterns
                         if isinstance(tool_call, dict):
@@ -1243,6 +1285,7 @@ async def get_response_async(
                                 tool_call_id = f"tool_{id(tool_call)}"
 
                         tool_result = await execute_tool_fn(function_name, arguments)
+                        tool_results.append(tool_result)  # Store the result
 
                         if verbose:
                             display_message = f"Agent {agent_name} called function '{function_name}' with arguments: {arguments}\n"
@@ -1261,7 +1304,8 @@ async def get_response_async(
                     response_text = ""
 
                     # Special handling for Ollama models that don't automatically process tool results
-                    if self._is_ollama_provider() and tool_result:
+                    ollama_handled = False
+                    if self._is_ollama_provider() and tool_results:
                         # For Ollama models, we need to explicitly ask the model to process the tool results
                         # First, check if the response is just a JSON tool call
                         try:
@@ -1274,13 +1318,30 @@ async def get_response_async(
                                 # Create a prompt that asks the model to process the tool results based on original context
                                 # Extract the original user query from messages
                                 original_query = ""
-                                for msg in messages:
+                                for msg in reversed(messages):  # Look from the end to find the most recent user message
                                     if msg.get("role") == "user":
-                                        original_query = msg.get("content", "")
-                                        break
+                                        content = msg.get("content", "")
+                                        # Handle list content (multimodal)
+                                        if isinstance(content, list):
+                                            for item in content:
+                                                if isinstance(item, dict) and item.get("type") == "text":
+                                                    original_query = item.get("text", "")
+                                                    break
+                                        else:
+                                            original_query = content
+                                        if original_query:
+                                            break
+                                
+                                # Create a shorter follow-up prompt with all tool results
+                                # If there's only one result, use it directly; otherwise combine them
+                                if len(tool_results) == 1:
+                                    results_text = json.dumps(tool_results[0], indent=2)
+                                else:
+                                    results_text = json.dumps(tool_results, indent=2)
 
-                                # Create a shorter follow-up prompt
-                                follow_up_prompt = f"Results:\n{json.dumps(tool_result, indent=2)}\nProvide Answer to this Original Question based on the above results: '{original_query}'"
+                                follow_up_prompt = f"Results:\n{results_text}\nProvide Answer to this Original Question based on the above results: '{original_query}'"
+                                logging.debug(f"[OLLAMA_DEBUG] Original query extracted: {original_query}")
+                                logging.debug(f"[OLLAMA_DEBUG] Follow-up prompt: {follow_up_prompt[:200]}...")
 
                                 # Make a follow-up call to process the results
                                 follow_up_messages = [
@@ -1313,12 +1374,33 @@ async def get_response_async(
                                     ):
                                         if chunk and chunk.choices and chunk.choices[0].delta.content:
                                             response_text += chunk.choices[0].delta.content
+                                
+                                # Set flag to indicate Ollama was handled
+                                ollama_handled = True
+                                final_response_text = response_text.strip()
+                                logging.debug(f"[OLLAMA_DEBUG] Ollama follow-up response: {final_response_text[:200]}...")
+                                
+                                # Display the response if we got one
+                                if final_response_text and verbose:
+                                    display_interaction(
+                                        original_prompt,
+                                        final_response_text,
+                                        markdown=markdown,
+                                        generation_time=time.time() - start_time,
+                                        console=console
+                                    )
+                                
+                                # Return the final response after processing Ollama's follow-up
+                                if final_response_text:
+                                    return final_response_text
+                                else:
+                                    logging.warning("[OLLAMA_DEBUG] Ollama follow-up returned empty response")
                         except (json.JSONDecodeError, KeyError):
                             # Not a JSON response or not a tool call format, continue normally
                             pass
 
                     # If no special handling was needed or if it's not an Ollama model
-                    elif reasoning_steps:
+                    if reasoning_steps and not ollama_handled:
                         # Non-streaming call to capture reasoning
                         resp = await litellm.acompletion(
                             **self._build_completion_params(
@@ -1348,8 +1430,8 @@ async def get_response_async(
                                 generation_time=time.time() - start_time,
                                 console=console
                             )
-                    else:
-                        # Get response after tool calls with streaming
+                    elif not ollama_handled:
+                        # Get response after tool calls with streaming if not already handled
                         if verbose:
                             async for chunk in await litellm.acompletion(
                                 **self._build_completion_params(
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "praisonaiagents"
-version = "0.0.106"
+version = "0.0.107"
 description = "Praison AI agents for completing complex tasks with Self Reflection Agents"
 requires-python = ">=3.10"
 authors = [
 
@@ -1,33 +1,39 @@
-from praisonaiagents import Agent, Task, PraisonAIAgents
 import os
-from dotenv import load_dotenv
 
-load_dotenv()
+# Set OpenAI API configuration BEFORE importing praisonaiagents
+# os.environ["OPENAI_API_BASE"] = "http://localhost:1234/v1"
+# os.environ["OPENAI_API_KEY"] = "not-needed"
 
-llm_config = {
-    "model": "openai/gpt-4o-mini",
-    "api_key": os.getenv('OPENAI_API_KEY'),
-    "temperature": 0.7,
-    "max_tokens": 2000
-}
+# Now import after setting the environment
+from praisonaiagents import Agent, MCP
 
-blog_agent = Agent(
-    role="Blog Writer",
-    goal="Write a blog post about AI",
-    backstory="Expert at writing blog posts",
-    llm=llm_config,
-)
+# Paths to python and the weather server script
+python_path = os.getenv("PYTHON_PATH", "python")
+server_path = os.getenv("WEATHER_SERVER_PATH", "weather_server.py")
 
-blog_task = Task(
-    description="Write a blog post about AI trends in 1 paragraph",
-    expected_output="Well-written blog post about AI trends",
-    agent=blog_agent
-)
+# Create the agent with Ollama
+weather_agent = Agent(
+    name="Weather Assistant",
+    role="Weather assistant",
+    goal="Provide accurate and timely weather information for various cities",
+    instructions="""
+You are a helpful weather assistant that can provide current weather information,
+forecasts, and weather comparisons for different cities. Use the available weather tools to answer
+user questions about weather conditions. You can:
+
+- Get current weather for cities
+- Get hourly forecasts 
+- Compare weather between two cities
+- Use both mock data and real API data (when API key is provided)
+- Set use_real_api True to use real API data all the time
 
-agents = PraisonAIAgents(
-    agents=[blog_agent],
-    tasks=[blog_task],
-    memory=False
+Always use the appropriate weather tools when users ask about weather information.
+""",
+    llm="ollama/llama3.2",  # Using Ollama with llama3.2
+    tools=MCP(f"{python_path} {server_path}"),
+    verbose=True
 )
 
-result = agents.start()
+# Optional: run a sample task
+response = weather_agent.start("What's the weather in London?")
+print(response)