feat: enhance LLM and search engine capabilities (#12)

yelban · web-flow · commit 3c4187e80948 · 2025-01-06T19:27:48.000-08:00
* feat: enhance LLM and search engine capabilities

- Add Anthropic API support with Claude model
- Implement environment variables configuration via .env.local
- Add retry mechanism and error handling for search engine
- Improve search engine reliability with:
  - Random User-Agent rotation
  - API/HTML backend fallback
  - Exponential backoff retry
- Add new dependencies: anthropic and python-dotenv

Breaking changes:
- LLM API now requires provider-specific API keys in .env.local

* docs: add environment variables setup step in README

- Add step 2 for environment configuration
- Include instructions for copying .env.example
- Renumber subsequent installation step

* refactor: improve LLM API configuration and testing

- Add default model settings for different providers (OpenAI, Anthropic, Local)
- Replace hardcoded API configuration with environment variables
- Update search engine backend from 'html' to 'api'
- Enhance debug messages in search engine tests
- Update test cases to reflect new default configurations

Default models:
- OpenAI: gpt-3.5-turbo
- Anthropic: claude-3-sonnet-20240229
- Local: Qwen/Qwen2.5-32B-Instruct-AWQ

* ci: support master branch in GitHub Actions
diff --git a/.cursorrules b/.cursorrules
@@ -17,7 +17,7 @@ Note all the tools are in python. So in the case you need to do batch processing
 
 You always have an LLM at your side to help you with the task. For simple tasks, you could invoke the LLM by running the following command:
 ```
-py310/bin/python ./tools/llm_api.py --prompt "What is the capital of France?"
+py310/bin/python ./tools/llm_api.py --prompt "What is the capital of France?" --provider "anthropic"
 ```
 
 But usually it's a better idea to check the content of the file and use the APIs in the `tools/llm_api.py` file to invoke the LLM if needed.
diff --git a/.env.example b/.env.example
@@ -0,0 +1,2 @@
+OPENAI_API_KEY=
+ANTHROPIC_API_KEY=
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,36 @@
+name: Unit Tests
+
+on:
+  pull_request:
+    branches: [ master, main ]
+  push:
+    branches: [ master, main ]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    
+    steps:
+    - uses: actions/checkout@v3
+    
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+    
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        python -m playwright install chromium
+        
+    - name: Copy environment file
+      run: |
+        cp .env.example .env
+        
+    - name: Run tests
+      env:
+        ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      run: |
+        PYTHONPATH=. python -m unittest discover tests/
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,47 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/node_modules
+/.pnp
+.pnp.js
+
+# testing
+/coverage
+
+# next.js
+/.next/
+/out/
+
+# production
+/build
+
+# misc
+.DS_Store
+*.pem
+
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+.pnpm-debug.log*
+
+# local env files
+.env*.local
+
+# vercel
+.vercel
+
+# typescript
+*.tsbuildinfo
+next-env.d.ts
+
+credentials.json
+
+# asdf
+.tool-versions
+
+# pycache
+**/__pycache__/
+
+# python virtual environment
+/py310/
diff --git a/README.md b/README.md
@@ -26,7 +26,15 @@ source py310/bin/activate
 .\py310\Scripts\activate
 ```
 
-2. Install dependencies:
+2. Configure environment variables:
+```bash
+# Copy the example environment file
+cp .env.example .env
+
+# Edit .env with your API keys and configurations
+```
+
+3. Install dependencies:
 ```bash
 # Install required packages
 pip install -r requirements.txt
diff --git a/requirements.txt b/requirements.txt
@@ -7,6 +7,8 @@ duckduckgo-search>=4.1.1
 
 # LLM integration
 openai>=1.12.0
+anthropic>=0.42.0
+python-dotenv>=1.0.0
 
 # Testing
 unittest2>=1.1.0 
diff --git a/tests/test_llm_api.py b/tests/test_llm_api.py
@@ -35,14 +35,13 @@ def setUp(self):
     @unittest.skipIf(skip_llm_tests, skip_message)
     @patch('tools.llm_api.OpenAI')
     def test_create_llm_client(self, mock_openai):
-        # Test client creation
+        # Test client creation with default provider (openai)
         mock_openai.return_value = self.mock_client
-        client = create_llm_client()
+        client = create_llm_client()  # 使用預設 provider
         
         # Verify OpenAI was called with correct parameters
         mock_openai.assert_called_once_with(
-            base_url="http://192.168.180.137:8006/v1",
-            api_key="not-needed"
+            api_key=os.getenv('OPENAI_API_KEY')  # 使用環境變數中的 API key
         )
         
         self.assertEqual(client, self.mock_client)
@@ -53,15 +52,15 @@ def test_query_llm_success(self, mock_create_client):
         # Set up mock
         mock_create_client.return_value = self.mock_client
         
-        # Test query with default parameters
-        response = query_llm("Test prompt")
+        # Test query with default provider
+        response = query_llm("Test prompt")  # 使用預設 provider
         
         # Verify response
         self.assertEqual(response, "Test response")
         
         # Verify client was called correctly
         self.mock_client.chat.completions.create.assert_called_once_with(
-            model="Qwen/Qwen2.5-32B-Instruct-AWQ",
+            model="gpt-3.5-turbo",  # 使用 OpenAI 的預設模型
             messages=[{"role": "user", "content": "Test prompt"}],
             temperature=0.7
         )
diff --git a/tests/test_search_engine.py b/tests/test_search_engine.py
@@ -44,7 +44,8 @@ def test_successful_search(self, mock_ddgs):
         search("test query", max_results=2)
 
         # Check debug output
-        self.assertIn("DEBUG: Searching for query: test query", self.stderr.getvalue())
+        expected_debug = "DEBUG: Attempt 1/3 - Searching for query: test query"
+        self.assertIn(expected_debug, self.stderr.getvalue())
         self.assertIn("DEBUG: Found 2 results", self.stderr.getvalue())
 
         # Check search results output
@@ -62,7 +63,7 @@ def test_successful_search(self, mock_ddgs):
         mock_ddgs_instance.__enter__.return_value.text.assert_called_once_with(
             "test query",
             max_results=2,
-            backend='html'
+            backend='api'
         )
 
     @patch('tools.search_engine.DDGS')
diff --git a/tools/llm_api.py b/tools/llm_api.py
@@ -1,43 +1,93 @@
 #!/usr/bin/env /workspace/tmp_windsurf/py310/bin/python3
 
 from openai import OpenAI
+from anthropic import Anthropic
 import argparse
+import os
+from dotenv import load_dotenv
+from pathlib import Path
 
-def create_llm_client():
-    client = OpenAI(
-        base_url="http://192.168.180.137:8006/v1",
-        api_key="not-needed"  # API key might not be needed for local deployment
-    )
-    return client
+# 載入 .env.local 檔案
+env_path = Path('.') / '.env.local'
+load_dotenv(dotenv_path=env_path)
 
-def query_llm(prompt, client=None, model="Qwen/Qwen2.5-32B-Instruct-AWQ"):
+def create_llm_client(provider="openai"):
+    if provider == "openai":
+        api_key = os.getenv('OPENAI_API_KEY')
+        if not api_key:
+            raise ValueError("OPENAI_API_KEY not found in environment variables")
+        return OpenAI(
+            api_key=api_key
+        )
+    elif provider == "anthropic":
+        api_key = os.getenv('ANTHROPIC_API_KEY')
+        if not api_key:
+            raise ValueError("ANTHROPIC_API_KEY not found in environment variables")
+        return Anthropic(
+            api_key=api_key
+        )
+    elif provider == "local":
+        return OpenAI(
+            base_url="http://192.168.180.137:8006/v1",
+            api_key="not-needed"  # 本地部署可能不需要 API key
+        )
+    else:
+        raise ValueError(f"Unsupported provider: {provider}")
+
+def query_llm(prompt, client=None, model=None, provider="openai"):
     if client is None:
-        client = create_llm_client()
+        client = create_llm_client(provider)
     
     try:
-        response = client.chat.completions.create(
-            model=model,
-            messages=[
-                {"role": "user", "content": prompt}
-            ],
-            temperature=0.7,
-        )
-        return response.choices[0].message.content
+        # 設定預設模型
+        if model is None:
+            if provider == "openai":
+                model = "gpt-3.5-turbo"
+            elif provider == "anthropic":
+                model = "claude-3-sonnet-20240229"
+            elif provider == "local":
+                model = "Qwen/Qwen2.5-32B-Instruct-AWQ"
+            
+        if provider == "openai" or provider == "local":
+            response = client.chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "user", "content": prompt}
+                ],
+                temperature=0.7,
+            )
+            return response.choices[0].message.content
+        elif provider == "anthropic":
+            response = client.messages.create(
+                model=model,
+                max_tokens=1000,
+                messages=[
+                    {"role": "user", "content": prompt}
+                ]
+            )
+            return response.content[0].text
     except Exception as e:
         print(f"Error querying LLM: {e}")
-        print("Note: If you haven't configured a local LLM server, this error is expected and can be ignored.")
-        print("The LLM functionality is optional and won't affect other features.")
         return None
 
 def main():
     parser = argparse.ArgumentParser(description='Query an LLM with a prompt')
     parser.add_argument('--prompt', type=str, help='The prompt to send to the LLM', required=True)
-    parser.add_argument('--model', type=str, default="Qwen/Qwen2.5-32B-Instruct-AWQ",
-                       help='The model to use (default: Qwen/Qwen2.5-32B-Instruct-AWQ)')
+    parser.add_argument('--provider', type=str, choices=['openai', 'anthropic'], 
+                       default="openai", help='The API provider to use')
+    parser.add_argument('--model', type=str, 
+                       help='The model to use (default depends on provider)')
     args = parser.parse_args()
 
-    client = create_llm_client()
-    response = query_llm(args.prompt, client, model=args.model)
+    # 設定預設模型
+    if not args.model:
+        if args.provider == "openai":
+            args.model = "gpt-3.5-turbo"
+        else:
+            args.model = "claude-3-5-sonnet-20241022"
+
+    client = create_llm_client(args.provider)
+    response = query_llm(args.prompt, client, model=args.model, provider=args.provider)
     if response:
         print(response)
     else:
diff --git a/tools/search_engine.py b/tools/search_engine.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+OPENAI_API_KEY=`
	`2`	`+ANTHROPIC_API_KEY=`