diff --git a/src/agent/custom_agent.py b/src/agent/custom_agent.py
index bfeb33ca..d611621a 100644
--- a/src/agent/custom_agent.py
+++ b/src/agent/custom_agent.py
@@ -220,6 +220,7 @@ def update_step_info(
logger.info(f"🧠 All Memory: \n{step_info.memory}")
+
@time_execution_async("--get_next_action")
async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutput:
"""Get next action from LLM based on current state"""
@@ -232,19 +233,80 @@ async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutpu
logger.info(ai_message.reasoning_content)
logger.info("🤯 End Deep Thinking")
- if isinstance(ai_message.content, list):
- ai_content = ai_message.content[0]
- else:
- ai_content = ai_message.content
-
- ai_content = ai_content.replace("```json", "").replace("```", "")
- ai_content = repair_json(ai_content)
- parsed_json = json.loads(ai_content)
- parsed: AgentOutput = self.AgentOutput(**parsed_json)
-
- if parsed is None:
- logger.debug(ai_message.content)
- raise ValueError('Could not parse response.')
+ try:
+ if isinstance(ai_message.content, list):
+ ai_content = ai_message.content[0]
+ else:
+ ai_content = ai_message.content
+
+ # Add this debug print
+ print("RAW AI CONTENT:", ai_content)
+
+ # Enhanced JSON parsing
+ if "```json" in ai_content or "```" in ai_content:
+ # Extract JSON from code blocks
+ ai_content = re.sub(r'```(?:json)?(.*?)```', r'\1', ai_content, flags=re.DOTALL)
+
+ # Try to repair the JSON
+ try:
+ ai_content = repair_json(ai_content)
+ except Exception as json_repair_error:
+ logger.warning(f"JSON repair failed: {json_repair_error}")
+ # Try more aggressive cleaning
+ ai_content = re.sub(r'[^{}[\],:"\d\w\s.-]', '', ai_content)
+
+ try:
+ parsed_json = json.loads(ai_content)
+ if 'action' in parsed_json:
+ for action in parsed_json['action']:
+ if isinstance(action, dict) and 'done' in action and isinstance(action['done'], dict) and 'text' in action['done']:
+ # If text is a dict with type/value structure, extract the value
+ if isinstance(action['done']['text'], dict) and 'value' in action['done']['text']:
+ action['done']['text'] = action['done']['text']['value']
+ # If text is any other non-string dict, convert to string
+ elif isinstance(action['done']['text'], dict):
+ action['done']['text'] = str(action['done']['text'])
+ parsed: AgentOutput = self.AgentOutput(**parsed_json)
+ except json.JSONDecodeError as e:
+ # Create a minimal valid structure if parsing fails
+ logger.warning("JSON parsing failed, creating minimal structure")
+ parsed_json = {
+ "current_state": {
+ "prev_action_evaluation": "Failed - Unable to parse model output",
+ "important_contents": "",
+ "task_progress": "",
+ "future_plans": "Retry with simpler action",
+ "thought": "The model output was malformed. I need to retry with a simpler action.",
+ "summary": "Retrying with simpler action"
+ },
+ "action": [{"extract_page_content": {}}] # Safe fallback action
+ }
+
+ parsed: AgentOutput = self.AgentOutput(**parsed_json)
+ except Exception as e:
+ logger.error(f"Error processing model output: {e}")
+ # Create a minimal fallback output
+ minimal_json = {
+ "current_state": {
+ "prev_action_evaluation": "Failed - Unable to process model output",
+ "important_contents": "",
+ "task_progress": "",
+ "future_plans": "Retry with simpler action",
+ "thought": "There was an error processing the model output. I'll take a safe action.",
+ "summary": "Handling error gracefully"
+ },
+ "action": [{"extract_page_content": {}}] # Safe fallback action
+ }
+ parsed = self.AgentOutput(**minimal_json)
+
+ # Continue with existing code...
+ if len(parsed.action) > 0:
+ first_action = parsed.action[0]
+ if hasattr(first_action, 'go_to_url'):
+ logger.info("Navigation action detected - executing in isolation")
+ parsed.action = [first_action]
+ elif hasattr(first_action, 'done'):
+ parsed.action = [first_action]
# Limit actions to maximum allowed per step
parsed.action = parsed.action[: self.max_actions_per_step]
@@ -310,6 +372,9 @@ async def _run_planner(self) -> Optional[str]:
@time_execution_async("--step")
async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
"""Execute one step of the task"""
+ if not self.browser_context:
+ raise RuntimeError("Browser context not initialized")
+
logger.info(f"\n📍 Step {self.n_steps}")
state = None
model_output = None
@@ -352,14 +417,17 @@ async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
check_break_if_paused=lambda: self._check_if_stopped_or_paused(),
available_file_paths=self.available_file_paths,
)
- if len(result) != len(actions):
- # I think something changes, such information should let LLM know
+ if len(result) != len(actions) and len(actions) > 0:
+ # Add safety check for result list
+ base_action_index = len(result) - 1 if len(result) > 0 else 0
for ri in range(len(result), len(actions)):
+ error_msg = f"{actions[ri].model_dump_json(exclude_unset=True)} is Failed to execute."
+ if len(result) > 0:
+ error_msg += f" Something new appeared after action {actions[base_action_index].model_dump_json(exclude_unset=True)}"
result.append(ActionResult(extracted_content=None,
- include_in_memory=True,
- error=f"{actions[ri].model_dump_json(exclude_unset=True)} is Failed to execute. \
- Something new appeared after action {actions[len(result) - 1].model_dump_json(exclude_unset=True)}",
- is_done=False))
+ include_in_memory=True,
+ error=error_msg,
+ is_done=False))
for ret_ in result:
if ret_.extracted_content and "Extracted page" in ret_.extracted_content:
# record every extracted page
diff --git a/src/agent/custom_views.py b/src/agent/custom_views.py
index d0dfb061..7496a471 100644
--- a/src/agent/custom_views.py
+++ b/src/agent/custom_views.py
@@ -25,7 +25,7 @@ class CustomAgentBrain(BaseModel):
task_progress: str
future_plans: str
thought: str
- summary: str
+ summary: str=""
class CustomAgentOutput(AgentOutput):
diff --git a/src/utils/default_config_settings.py b/src/utils/default_config_settings.py
index e6fa88f9..5ddc27b1 100644
--- a/src/utils/default_config_settings.py
+++ b/src/utils/default_config_settings.py
@@ -14,7 +14,7 @@ def default_config():
"tool_calling_method": "auto",
"llm_provider": "openai",
"llm_model_name": "gpt-4o",
- "llm_num_ctx": 32000,
+ "llm_num_ctx": 16000,
"llm_temperature": 1.0,
"llm_base_url": "",
"llm_api_key": "",
diff --git a/src/utils/llm.py b/src/utils/llm.py
index 2ea332e1..34519881 100644
--- a/src/utils/llm.py
+++ b/src/utils/llm.py
@@ -1,5 +1,6 @@
from openai import OpenAI
-import pdb
+import logging
+import traceback
from langchain_openai import ChatOpenAI
from langchain_core.globals import get_llm_cache
from langchain_core.language_models.base import (
@@ -40,14 +41,87 @@
cast,
)
-class DeepSeekR1ChatOpenAI(ChatOpenAI):
+logger = logging.getLogger(__name__)
+
+class ModelResponseProcessor:
+ """Utility class for extracting and processing model responses."""
+
+ @staticmethod
+ def extract_reasoning_content(content: str) -> tuple[str, str]:
+ """Extract reasoning content from various formats."""
+ reasoning_content = ""
+ processed_content = content
+
+ # Try different formats
+ if "" in content and "" in content:
+ # DeepSeek format with tags
+ parts = content.split("", 1)
+ reasoning_content = parts[0].replace("", "").strip()
+ processed_content = parts[1].strip() if len(parts) > 1 else content
+ elif "Reasoning:" in content and "Action:" in content:
+ # Format with explicit Reasoning/Action sections
+ parts = content.split("Action:", 1)
+ reasoning_content = parts[0].replace("Reasoning:", "").strip()
+ processed_content = parts[1].strip() if len(parts) > 1 else content
+
+ return reasoning_content, processed_content
+
+ @staticmethod
+ def extract_json_content(content: str) -> str:
+ """Extract JSON content from various formats."""
+ processed_content = content
+
+ # Try JSON code blocks
+ if "```json" in content and "```" in content:
+ try:
+ json_parts = content.split("```json", 1)
+ if len(json_parts) > 1:
+ code_parts = json_parts[1].split("```", 1)
+ if code_parts:
+ processed_content = code_parts[0].strip()
+ except Exception:
+ pass
+
+ # Try JSON Response marker
+ elif "**JSON Response:**" in content:
+ try:
+ json_parts = content.split("**JSON Response:**", 1)
+ if len(json_parts) > 1:
+ processed_content = json_parts[1].strip()
+ except Exception:
+ pass
+
+ return processed_content
- def __init__(self, *args: Any, **kwargs: Any) -> None:
+ @staticmethod
+ def process_response(response: AIMessage) -> AIMessage:
+ """Process a response to extract reasoning and content."""
+ try:
+ if not hasattr(response, "content") or not response.content:
+ return AIMessage(content="", reasoning_content="")
+
+ content = response.content
+
+ # Extract reasoning content
+ reasoning_content, processed_content = ModelResponseProcessor.extract_reasoning_content(content)
+
+ # Extract JSON content if present
+ processed_content = ModelResponseProcessor.extract_json_content(processed_content)
+
+ return AIMessage(content=processed_content, reasoning_content=reasoning_content)
+
+ except Exception as e:
+ logger.error(f"Error processing response: {e}")
+ # Return original message if processing fails
+ return response
+
+
+class EnhancedChatOpenAI(ChatOpenAI):
+ """Enhanced ChatOpenAI that handles reasoning extraction."""
+
+ def __init__(self, *args, **kwargs):
+ self.extract_reasoning = kwargs.pop("extract_reasoning", False)
super().__init__(*args, **kwargs)
- self.client = OpenAI(
- base_url=kwargs.get("base_url"),
- api_key=kwargs.get("api_key")
- )
async def ainvoke(
self,
@@ -57,24 +131,18 @@ async def ainvoke(
stop: Optional[list[str]] = None,
**kwargs: Any,
) -> AIMessage:
- message_history = []
- for input_ in input:
- if isinstance(input_, SystemMessage):
- message_history.append({"role": "system", "content": input_.content})
- elif isinstance(input_, AIMessage):
- message_history.append({"role": "assistant", "content": input_.content})
- else:
- message_history.append({"role": "user", "content": input_.content})
-
- response = self.client.chat.completions.create(
- model=self.model_name,
- messages=message_history
- )
-
- reasoning_content = response.choices[0].message.reasoning_content
- content = response.choices[0].message.content
- return AIMessage(content=content, reasoning_content=reasoning_content)
-
+ try:
+ response = await super().ainvoke(input=input, config=config, stop=stop, **kwargs)
+
+ if self.extract_reasoning:
+ return ModelResponseProcessor.process_response(response)
+ return response
+
+ except Exception as e:
+ logger.error(f"Error in EnhancedChatOpenAI.ainvoke: {e}")
+ # Return a minimal AIMessage
+ return AIMessage(content=f"Error: {str(e)}")
+
def invoke(
self,
input: LanguageModelInput,
@@ -83,26 +151,29 @@ def invoke(
stop: Optional[list[str]] = None,
**kwargs: Any,
) -> AIMessage:
- message_history = []
- for input_ in input:
- if isinstance(input_, SystemMessage):
- message_history.append({"role": "system", "content": input_.content})
- elif isinstance(input_, AIMessage):
- message_history.append({"role": "assistant", "content": input_.content})
- else:
- message_history.append({"role": "user", "content": input_.content})
-
- response = self.client.chat.completions.create(
- model=self.model_name,
- messages=message_history
- )
+ try:
+ response = super().invoke(input=input, config=config, stop=stop, **kwargs)
+
+ if self.extract_reasoning:
+ return ModelResponseProcessor.process_response(response)
+ return response
+
+ except Exception as e:
+ logger.error(f"Error in EnhancedChatOpenAI.invoke: {e}")
+ # Return a minimal AIMessage
+ return AIMessage(content=f"Error: {str(e)}")
+
- reasoning_content = response.choices[0].message.reasoning_content
- content = response.choices[0].message.content
- return AIMessage(content=content, reasoning_content=reasoning_content)
+class EnhancedChatOllama(ChatOllama):
+ """Enhanced ChatOllama that handles reasoning extraction."""
-class DeepSeekR1ChatOllama(ChatOllama):
-
+ extract_reasoning: bool = True
+
+ def __init__(self, *args, **kwargs):
+ if "extract_reasoning" in kwargs:
+ self.extract_reasoning = kwargs.pop("extract_reasoning")
+ super().__init__(*args, **kwargs)
+
async def ainvoke(
self,
input: LanguageModelInput,
@@ -111,14 +182,18 @@ async def ainvoke(
stop: Optional[list[str]] = None,
**kwargs: Any,
) -> AIMessage:
- org_ai_message = await super().ainvoke(input=input)
- org_content = org_ai_message.content
- reasoning_content = org_content.split("")[0].replace("", "")
- content = org_content.split("")[1]
- if "**JSON Response:**" in content:
- content = content.split("**JSON Response:**")[-1]
- return AIMessage(content=content, reasoning_content=reasoning_content)
-
+ try:
+ response = await super().ainvoke(input=input, config=config, stop=stop, **kwargs)
+
+ if self.extract_reasoning:
+ return ModelResponseProcessor.process_response(response)
+ return response
+
+ except Exception as e:
+ logger.error(f"Error in EnhancedChatOllama.ainvoke: {e}\n{traceback.format_exc()}")
+ # Return a minimal AIMessage
+ return AIMessage(content=f"Error: {str(e)}")
+
def invoke(
self,
input: LanguageModelInput,
@@ -127,10 +202,83 @@ def invoke(
stop: Optional[list[str]] = None,
**kwargs: Any,
) -> AIMessage:
- org_ai_message = super().invoke(input=input)
- org_content = org_ai_message.content
- reasoning_content = org_content.split("")[0].replace("", "")
- content = org_content.split("")[1]
- if "**JSON Response:**" in content:
- content = content.split("**JSON Response:**")[-1]
- return AIMessage(content=content, reasoning_content=reasoning_content)
\ No newline at end of file
+ try:
+ # Try special API for compatible models
+ if hasattr(self, "client") and hasattr(self.client, "chat") and \
+ any(name in self.model_name for name in ["deepseek-r1", "command-r"]):
+ try:
+ message_history = []
+ for input_ in input:
+ if isinstance(input_, SystemMessage):
+ message_history.append({"role": "system", "content": input_.content})
+ elif isinstance(input_, AIMessage):
+ message_history.append({"role": "assistant", "content": input_.content})
+ else:
+ message_history.append({"role": "user", "content": input_.content})
+
+ api_response = self.client.chat.completions.create(
+ model=self.model_name,
+ messages=message_history
+ )
+
+ content = getattr(api_response.choices[0].message, "content", "")
+ reasoning_content = getattr(api_response.choices[0].message, "reasoning_content", "")
+
+ if content and reasoning_content:
+ return AIMessage(content=content, reasoning_content=reasoning_content)
+ except Exception as api_err:
+ logger.warning(f"Special API approach failed, falling back: {api_err}")
+
+ # Standard approach
+ response = super().invoke(input=input, config=config, stop=stop, **kwargs)
+
+ if self.extract_reasoning:
+ return ModelResponseProcessor.process_response(response)
+ return response
+
+ except Exception as e:
+ logger.error(f"Error in EnhancedChatOllama.invoke: {e}\n{traceback.format_exc()}")
+ # Return a minimal AIMessage
+ return AIMessage(content=f"Error: {str(e)}")
+
+
+class DeepSeekR1ChatOpenAI(EnhancedChatOpenAI):
+ """Specialized class for DeepSeek models via OpenAI compatible API."""
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(extract_reasoning=True, *args, **kwargs)
+ self.client = OpenAI(
+ base_url=kwargs.get("base_url"),
+ api_key=kwargs.get("api_key")
+ )
+
+ async def ainvoke(
+ self,
+ input: LanguageModelInput,
+ config: Optional[RunnableConfig] = None,
+ *,
+ stop: Optional[list[str]] = None,
+ **kwargs: Any,
+ ) -> AIMessage:
+ try:
+ message_history = []
+ for input_ in input:
+ if isinstance(input_, SystemMessage):
+ message_history.append({"role": "system", "content": input_.content})
+ elif isinstance(input_, AIMessage):
+ message_history.append({"role": "assistant", "content": input_.content})
+ else:
+ message_history.append({"role": "user", "content": input_.content})
+
+ response = self.client.chat.completions.create(
+ model=self.model_name,
+ messages=message_history
+ )
+
+ reasoning_content = getattr(response.choices[0].message, "reasoning_content", "")
+ content = getattr(response.choices[0].message, "content", "")
+
+ return AIMessage(content=content, reasoning_content=reasoning_content)
+ except Exception as e:
+ logger.error(f"Error in DeepSeekR1ChatOpenAI.ainvoke: {e}\n{traceback.format_exc()}")
+ return AIMessage(content=f"Error processing DeepSeek model: {str(e)}")
\ No newline at end of file
diff --git a/src/utils/utils.py b/src/utils/utils.py
index b604812b..f4a2ef88 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -2,8 +2,9 @@
import os
import time
from pathlib import Path
-from typing import Dict, Optional
+from typing import Dict, Optional, List
import requests
+from ollama import ListResponse, list,Client
from langchain_anthropic import ChatAnthropic
from langchain_mistralai import ChatMistralAI
@@ -11,8 +12,8 @@
from langchain_ollama import ChatOllama
from langchain_openai import AzureChatOpenAI, ChatOpenAI
import gradio as gr
+from src.utils.llm import EnhancedChatOllama, EnhancedChatOpenAI, DeepSeekR1ChatOpenAI
-from .llm import DeepSeekR1ChatOpenAI, DeepSeekR1ChatOllama
PROVIDER_DISPLAY_NAMES = {
"openai": "OpenAI",
@@ -24,10 +25,38 @@
"moonshot": "MoonShot"
}
+def get_ollama_models() -> List[str]:
+ """
+ Fetch available models from Ollama server
+ Returns a list of model names or empty list if Ollama is not available
+ """
+ try:
+ response: ListResponse = list()
+ return [model.model for model in response.models]
+ except Exception as e:
+ print(f"Warning: Could not fetch Ollama models: {e}")
+ return []
+
+def update_model_names():
+ """
+ Update the model_names dictionary with current Ollama models
+ """
+ global model_names
+ ollama_models = get_ollama_models()
+
+ # Create a copy of the original dictionary
+ updated_models = model_names.copy()
+
+ # Update Ollama models if we successfully fetched them
+ if ollama_models:
+ updated_models["ollama"] = ollama_models
+
+ return updated_models
+
def get_llm_model(provider: str, **kwargs):
"""
- 获取LLM 模型
- :param provider: 模型类型
+ Get LLM model
+ :param provider: model type
:param kwargs:
:return:
"""
@@ -72,7 +101,7 @@ def get_llm_model(provider: str, **kwargs):
else:
base_url = kwargs.get("base_url")
- return ChatOpenAI(
+ return EnhancedChatOpenAI(
model=kwargs.get("model_name", "gpt-4o"),
temperature=kwargs.get("temperature", 0.0),
base_url=base_url,
@@ -92,11 +121,12 @@ def get_llm_model(provider: str, **kwargs):
api_key=api_key,
)
else:
- return ChatOpenAI(
+ return EnhancedChatOpenAI(
model=kwargs.get("model_name", "deepseek-chat"),
temperature=kwargs.get("temperature", 0.0),
base_url=base_url,
api_key=api_key,
+ extract_reasoning=True,
)
elif provider == "google":
return ChatGoogleGenerativeAI(
@@ -109,22 +139,18 @@ def get_llm_model(provider: str, **kwargs):
base_url = os.getenv("OLLAMA_ENDPOINT", "http://localhost:11434")
else:
base_url = kwargs.get("base_url")
-
- if "deepseek-r1" in kwargs.get("model_name", "qwen2.5:7b"):
- return DeepSeekR1ChatOllama(
- model=kwargs.get("model_name", "deepseek-r1:14b"),
- temperature=kwargs.get("temperature", 0.0),
- num_ctx=kwargs.get("num_ctx", 32000),
- base_url=base_url,
- )
- else:
- return ChatOllama(
- model=kwargs.get("model_name", "qwen2.5:7b"),
- temperature=kwargs.get("temperature", 0.0),
- num_ctx=kwargs.get("num_ctx", 32000),
- num_predict=kwargs.get("num_predict", 1024),
- base_url=base_url,
- )
+
+ model_name = kwargs.get("model_name", "qwen2.5:7b")
+
+ # Use the enhanced ChatOllama for all Ollama models
+ return EnhancedChatOllama(
+ model=model_name,
+ temperature=kwargs.get("temperature", 0.0),
+ num_ctx=min(kwargs.get("num_ctx", 16000), 32000),
+ num_predict=kwargs.get("num_predict", 1024),
+ base_url=base_url,
+ stop=["<|im_end|>", ""]
+ )
elif provider == "azure_openai":
if not kwargs.get("base_url", ""):
base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "")
@@ -144,15 +170,14 @@ def get_llm_model(provider: str, **kwargs):
else:
base_url = kwargs.get("base_url")
- return ChatOpenAI(
+ return EnhancedChatOpenAI(
model=kwargs.get("model_name", "qwen-plus"),
temperature=kwargs.get("temperature", 0.0),
base_url=base_url,
api_key=api_key,
)
-
elif provider == "moonshot":
- return ChatOpenAI(
+ return EnhancedChatOpenAI(
model=kwargs.get("model_name", "moonshot-v1-32k-vision-preview"),
temperature=kwargs.get("temperature", 0.0),
base_url=os.getenv("MOONSHOT_ENDPOINT"),
@@ -160,21 +185,24 @@ def get_llm_model(provider: str, **kwargs):
)
else:
raise ValueError(f"Unsupported provider: {provider}")
-
+
# Predefined model names for common providers
model_names = {
"anthropic": ["claude-3-5-sonnet-20241022", "claude-3-5-sonnet-20240620", "claude-3-opus-20240229"],
"openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo", "o3-mini"],
"deepseek": ["deepseek-chat", "deepseek-reasoner"],
"google": ["gemini-2.0-flash", "gemini-2.0-flash-thinking-exp", "gemini-1.5-flash-latest", "gemini-1.5-flash-8b-latest", "gemini-2.0-flash-thinking-exp-01-21", "gemini-2.0-pro-exp-02-05"],
- "ollama": ["qwen2.5:7b", "qwen2.5:14b", "qwen2.5:32b", "qwen2.5-coder:14b", "qwen2.5-coder:32b", "llama2:7b", "deepseek-r1:14b", "deepseek-r1:32b"],
"azure_openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"],
"mistral": ["pixtral-large-latest", "mistral-large-latest", "mistral-small-latest", "ministral-8b-latest"],
"alibaba": ["qwen-plus", "qwen-max", "qwen-turbo", "qwen-long"],
"moonshot": ["moonshot-v1-32k-vision-preview", "moonshot-v1-8k-vision-preview"],
+ # Ollama models will be populated dynamically
+ "ollama": []
}
-# Callback to update the model name dropdown based on the selected provider
+# Update model_names with current Ollama models
+model_names = update_model_names()
+
def update_model_dropdown(llm_provider, api_key=None, base_url=None):
"""
Update the model name dropdown with predefined models for the selected provider.
@@ -185,11 +213,37 @@ def update_model_dropdown(llm_provider, api_key=None, base_url=None):
if not base_url:
base_url = os.getenv(f"{llm_provider.upper()}_BASE_URL", "")
- # Use predefined models for the selected provider
+ if llm_provider == "ollama":
+ # Refresh Ollama models list when provider is selected
+ current_models = get_ollama_models()
+ if current_models:
+ return gr.Dropdown(
+ choices=current_models,
+ value=current_models[0] if current_models else "",
+ interactive=True
+ )
+ else:
+ return gr.Dropdown(
+ choices=[],
+ value="",
+ interactive=True,
+ allow_custom_value=True
+ )
+
+ # Use predefined models for other providers
if llm_provider in model_names:
- return gr.Dropdown(choices=model_names[llm_provider], value=model_names[llm_provider][0], interactive=True)
+ return gr.Dropdown(
+ choices=model_names[llm_provider],
+ value=model_names[llm_provider][0],
+ interactive=True
+ )
else:
- return gr.Dropdown(choices=[], value="", interactive=True, allow_custom_value=True)
+ return gr.Dropdown(
+ choices=[],
+ value="",
+ interactive=True,
+ allow_custom_value=True
+ )
def handle_api_key_error(provider: str, env_var: str):
"""
@@ -208,7 +262,6 @@ def encode_image(img_path):
image_data = base64.b64encode(fin.read()).decode("utf-8")
return image_data
-
def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Dict[str, Optional[str]]:
"""Get the latest recording and trace files"""
latest_files: Dict[str, Optional[str]] = {ext: None for ext in file_types}
@@ -219,16 +272,25 @@ def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Di
for file_type in file_types:
try:
- matches = list(Path(directory).rglob(f"*{file_type}"))
- if matches:
- latest = max(matches, key=lambda p: p.stat().st_mtime)
+ # Use os.walk instead of Path.rglob
+ matching_files = []
+ for root, _, files in os.walk(directory):
+ for filename in files:
+ if filename.endswith(file_type):
+ full_path = os.path.join(root, filename)
+ matching_files.append(full_path)
+
+ if matching_files:
+ # Find latest file by modification time
+ latest = max(matching_files, key=lambda p: os.path.getmtime(p))
# Only return files that are complete (not being written)
- if time.time() - latest.stat().st_mtime > 1.0:
- latest_files[file_type] = str(latest)
+ if time.time() - os.path.getmtime(latest) > 1.0:
+ latest_files[file_type] = latest
except Exception as e:
print(f"Error getting latest {file_type} file: {e}")
return latest_files
+
async def capture_screenshot(browser_context):
"""Capture and encode a screenshot"""
# Extract the Playwright browser instance
diff --git a/webui.py b/webui.py
index e770d99d..6929c449 100644
--- a/webui.py
+++ b/webui.py
@@ -752,8 +752,8 @@ def create_ui(config, theme_name="Ocean"):
interactive=True,
allow_custom_value=True, # Allow users to input custom model names
choices=["auto", "json_schema", "function_calling"],
- info="Tool Calls Funtion Name",
- visible=False
+ info="Tool Callin Method for the LLM",
+ visible=True
)
with gr.TabItem("🔧 LLM Configuration", id=2):