diff --git a/extension/package.json b/extension/package.json index dacaad18..b5bde1bf 100644 --- a/extension/package.json +++ b/extension/package.json @@ -16,6 +16,7 @@ }, "dependencies": { "@radix-ui/react-slot": "^1.2.0", + "@rrweb/types": "2.0.0-alpha.18", "@tailwindcss/vite": "^4.1.5", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", @@ -34,6 +35,7 @@ "@wxt-dev/module-react": "^1.1.3", "tw-animate-css": "^1.2.9", "typescript": "^5.8.3", + "vite": "^6.3.5", "wxt": "^0.20.6" } } diff --git a/workflows/backend/service.py b/workflows/backend/service.py index 7338fd09..8af51ab9 100644 --- a/workflows/backend/service.py +++ b/workflows/backend/service.py @@ -6,8 +6,8 @@ import aiofiles from browser_use.browser.browser import Browser -from langchain_openai import ChatOpenAI +from workflow_use.config import create_llm_pair from workflow_use.controller.service import WorkflowController from workflow_use.workflow.service import Workflow @@ -33,10 +33,11 @@ def __init__(self) -> None: # LLM / workflow executor try: - self.llm_instance = ChatOpenAI(model='gpt-4.1-mini') + self.llm_instance, self.page_extraction_llm = create_llm_pair() except Exception as exc: - print(f'Error initializing LLM: {exc}. Ensure OPENAI_API_KEY is set.') + print(f'Error initializing LLM: {exc}. Check your .env configuration.') self.llm_instance = None + self.page_extraction_llm = None self.browser_instance = Browser() self.controller_instance = WorkflowController() @@ -154,7 +155,7 @@ async def run_workflow_in_background( workflow_path = self.tmp_dir / workflow_name try: self.workflow_obj = Workflow.load_from_file( - str(workflow_path), llm=self.llm_instance, browser=self.browser_instance, controller=self.controller_instance + str(workflow_path), llm=self.llm_instance, browser=self.browser_instance, controller=self.controller_instance, page_extraction_llm=self.page_extraction_llm ) except Exception as e: print(f'Error loading workflow: {e}') diff --git a/workflows/cli.py b/workflows/cli.py index 606610a0..123f0436 100644 --- a/workflows/cli.py +++ b/workflows/cli.py @@ -9,11 +9,10 @@ import typer from browser_use import Browser -# Assuming OPENAI_API_KEY is set in the environment -from langchain_openai import ChatOpenAI from patchright.async_api import async_playwright as patchright_async_playwright from workflow_use.builder.service import BuilderService +from workflow_use.config import create_llm_pair, load_env_config from workflow_use.controller.service import WorkflowController from workflow_use.mcp.service import get_mcp_server from workflow_use.recorder.service import RecordingService # Added import @@ -31,16 +30,15 @@ # Default LLM instance to None llm_instance = None +page_extraction_llm = None + try: - llm_instance = ChatOpenAI(model='gpt-4o') - page_extraction_llm = ChatOpenAI(model='gpt-4o-mini') + # Load environment configuration and create LLM pair + load_env_config() + llm_instance, page_extraction_llm = create_llm_pair() except Exception as e: - typer.secho(f'Error initializing LLM: {e}. Would you like to set your OPENAI_API_KEY?', fg=typer.colors.RED) - set_openai_api_key = input('Set OPENAI_API_KEY? (y/n): ') - if set_openai_api_key.lower() == 'y': - os.environ['OPENAI_API_KEY'] = input('Enter your OPENAI_API_KEY: ') - llm_instance = ChatOpenAI(model='gpt-4o') - page_extraction_llm = ChatOpenAI(model='gpt-4o-mini') + typer.secho(f'Error initializing LLM: {e}. Please check your .env configuration.', fg=typer.colors.RED) + typer.secho('Make sure you have the correct API keys and provider settings in workflows/.env', fg=typer.colors.YELLOW) builder_service = BuilderService(llm=llm_instance) if llm_instance else None # recorder_service = RecorderService() # Placeholder @@ -426,8 +424,11 @@ def mcp_server_command( typer.echo(typer.style('Starting MCP server...', bold=True)) typer.echo() # Add space - llm_instance = ChatOpenAI(model='gpt-4o') - page_extraction_llm = ChatOpenAI(model='gpt-4o-mini') + try: + llm_instance, page_extraction_llm = create_llm_pair() + except Exception as e: + typer.secho(f'Error initializing LLM for MCP server: {e}', fg=typer.colors.RED) + raise typer.Exit(code=1) mcp = get_mcp_server(llm_instance, page_extraction_llm=page_extraction_llm, workflow_dir='./tmp') diff --git a/workflows/workflow_use/config.py b/workflows/workflow_use/config.py new file mode 100644 index 00000000..09386193 --- /dev/null +++ b/workflows/workflow_use/config.py @@ -0,0 +1,151 @@ +""" +LLM Configuration utility for workflow-use. +Handles loading environment variables and creating LLM instances based on provider configuration. +""" + +import os +from typing import Optional, Tuple +from dotenv import load_dotenv +from langchain_core.language_models.chat_models import BaseChatModel + + +def load_env_config() -> None: + """Load environment variables from .env file.""" + # Load from the workflows directory .env file + env_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), '.env') + load_dotenv(env_path) + + +def get_llm_provider() -> str: + """Get the configured LLM provider.""" + return os.getenv('LLM_PROVIDER', 'openai').lower() + + +def create_openai_llm(model_name: Optional[str] = None, **kwargs) -> BaseChatModel: + """Create OpenAI LLM instance with configuration from environment.""" + from langchain_openai import ChatOpenAI + + model = model_name or os.getenv('OPENAI_MODEL', 'gpt-4o') + temperature = float(os.getenv('OPENAI_TEMPERATURE', '0.1')) + max_tokens = int(os.getenv('OPENAI_MAX_TOKENS', '4096')) + api_key = os.getenv('OPENAI_API_KEY') + base_url = os.getenv('OPENAI_BASE_URL') + + if not api_key: + raise ValueError("OPENAI_API_KEY environment variable not set") + + llm_kwargs = { + 'model': model, + 'temperature': temperature, + 'max_tokens': max_tokens, + 'api_key': api_key, + **kwargs + } + + if base_url: + llm_kwargs['base_url'] = base_url + + return ChatOpenAI(**llm_kwargs) + + +def create_anthropic_llm(model_name: Optional[str] = None, **kwargs) -> BaseChatModel: + """Create Anthropic LLM instance with configuration from environment.""" + from langchain_anthropic import ChatAnthropic + + model = model_name or os.getenv('ANTHROPIC_MODEL', 'claude-3-5-sonnet-20241022') + temperature = float(os.getenv('ANTHROPIC_TEMPERATURE', '0.1')) + max_tokens = int(os.getenv('ANTHROPIC_MAX_TOKENS', '4096')) + api_key = os.getenv('ANTHROPIC_API_KEY') + + if not api_key: + raise ValueError("ANTHROPIC_API_KEY environment variable not set") + + return ChatAnthropic( + model=model, + temperature=temperature, + max_tokens=max_tokens, + api_key=api_key, + **kwargs + ) + + +def create_google_llm(model_name: Optional[str] = None, **kwargs) -> BaseChatModel: + """Create Google LLM instance with configuration from environment.""" + from langchain_google_genai import ChatGoogleGenerativeAI + + model = model_name or os.getenv('GOOGLE_MODEL', 'gemini-1.5-pro') + temperature = float(os.getenv('GOOGLE_TEMPERATURE', '0.1')) + max_tokens = int(os.getenv('GOOGLE_MAX_TOKENS', '4096')) + api_key = os.getenv('GOOGLE_API_KEY') + + if not api_key: + raise ValueError("GOOGLE_API_KEY environment variable not set") + + return ChatGoogleGenerativeAI( + model=model, + temperature=temperature, + max_output_tokens=max_tokens, + google_api_key=api_key, + **kwargs + ) + + +def create_llm(model_name: Optional[str] = None, provider: Optional[str] = None, **kwargs) -> BaseChatModel: + """ + Create LLM instance based on configured provider. + + Args: + model_name: Override default model name + provider: Override default provider + **kwargs: Additional arguments passed to the LLM constructor + + Returns: + BaseChatModel instance + + Raises: + ValueError: If provider is not supported or API key is missing + """ + provider = provider or get_llm_provider() + + if provider == 'openai': + return create_openai_llm(model_name, **kwargs) + elif provider == 'anthropic': + return create_anthropic_llm(model_name, **kwargs) + elif provider == 'google': + return create_google_llm(model_name, **kwargs) + else: + raise ValueError(f"Unsupported LLM provider: {provider}") + + +def create_llm_pair() -> Tuple[BaseChatModel, BaseChatModel]: + """ + Create a pair of LLM instances: main LLM and page extraction LLM. + + Returns: + Tuple of (main_llm, page_extraction_llm) + """ + provider = get_llm_provider() + + if provider == 'openai': + main_model = os.getenv('OPENAI_MODEL', 'gpt-4o') + page_model = os.getenv('OPENAI_PAGE_EXTRACTION_MODEL', 'gpt-4o-mini') + main_llm = create_openai_llm(main_model) + page_llm = create_openai_llm(page_model) + elif provider == 'anthropic': + main_model = os.getenv('ANTHROPIC_MODEL', 'claude-3-5-sonnet-20241022') + page_model = os.getenv('ANTHROPIC_PAGE_EXTRACTION_MODEL', 'claude-3-haiku-20240307') + main_llm = create_anthropic_llm(main_model) + page_llm = create_anthropic_llm(page_model) + elif provider == 'google': + main_model = os.getenv('GOOGLE_MODEL', 'gemini-1.5-pro') + page_model = os.getenv('GOOGLE_PAGE_EXTRACTION_MODEL', 'gemini-1.5-flash') + main_llm = create_google_llm(main_model) + page_llm = create_google_llm(page_model) + else: + raise ValueError(f"Unsupported LLM provider: {provider}") + + return main_llm, page_llm + + +# Initialize environment configuration when module is imported +load_env_config() \ No newline at end of file