Add tools system: web search and image generation (v1.1.3)

pippinlovesdot · pippinlovesdot · commit 63bf8212ba14 · 2025-12-12T02:59:25.000+08:00
- Add web_search tool using OpenRouter native plugins API
- Add image_generation tool (standalone function for agent architecture)
- Register both tools in registry with OpenAI-style schemas
- Update ARCHITECTURE.md with tools documentation
- Update README.md with new tools in project structure
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
@@ -44,6 +44,8 @@ services/
 tools/
   __init__.py
   registry.py               # Tool registry for function calling
+  web_search.py             # Web search via OpenRouter plugins
+  image_generation.py       # Image generation tool
 assets/                     # Reference images for generation
 .env.example                # Environment template
 requirements.txt            # Python dependencies
@@ -185,11 +187,43 @@ Tool registry for LLM function calling. Contains:
 - `TOOLS` — dict mapping tool names to async functions
 - `TOOLS_SCHEMA` — list of JSON schemas in OpenAI function calling format
 
-Currently empty, ready for extension. To add a tool:
+**Available tools:**
+- `web_search` — real-time web search via OpenRouter plugins
+- `generate_image` — image generation using Gemini 3 Pro
+
+To add a new tool:
 1. Create tool function in `tools/` directory
 2. Import and add to `TOOLS` dict
 3. Add JSON schema to `TOOLS_SCHEMA` list
 
+### tools/web_search.py
+Web search using OpenRouter's native web search plugin.
+
+**How it works:**
+- Uses `plugins: [{id: "web"}]` parameter in OpenRouter API
+- Returns search results with source citations (URLs, titles, snippets)
+- Configurable `max_results` (1-10, default 5)
+
+**Function signature:**
+```python
+async def web_search(query: str, max_results: int = 5) -> dict[str, Any]:
+    # Returns {"content": "...", "sources": [...]}
+```
+
+### tools/image_generation.py
+Image generation using Gemini 3 Pro via OpenRouter.
+
+**How it works:**
+- Loads reference images from `assets/` folder (up to 2 randomly selected)
+- Sends reference images + prompt to model for consistent character appearance
+- Returns raw image bytes (PNG format)
+
+**Function signature:**
+```python
+async def generate_image(prompt: str) -> bytes:
+    # Returns image bytes
+```
+
 ---
 
 ## Database Schema
diff --git a/README.md b/README.md
@@ -176,9 +176,9 @@ Visual content generation supports two providers:
 
 ### Web Search
 
-Real-time web search capability powered by **Perplexity** via OpenRouter:
+Real-time web search capability powered by **OpenRouter's native plugins**:
 
-- **Perplexity Sonar** — Online search model for current information, news, and facts. Automatically invoked by the LLM when it needs fresh data. Integrated through OpenRouter for unified API access.
+- **OpenRouter Web Plugin** — Native web search using `plugins: [{id: "web"}]` API. Returns real search results with source citations (URLs, titles, snippets). Supports multiple search engines including native provider search and Exa.ai.
 
 ### Twitter Integration
 
@@ -224,7 +224,8 @@ my-agent/
 │
 ├── tools/
 │   ├── registry.py          # Available tools for LLM
-│   └── web_search.py        # Web search capability
+│   ├── web_search.py        # Web search via OpenRouter plugins
+│   └── image_generation.py  # Image generation tool
 │
 ├── main.py                  # FastAPI + APScheduler entry point
 ├── requirements.txt         # Dependencies
diff --git a/tools/image_generation.py b/tools/image_generation.py
@@ -0,0 +1,172 @@
+"""
+Image generation tool using OpenRouter API.
+
+Generates images based on text prompts and reference images from assets folder.
+Uses google/gemini-3-pro-image-preview model via OpenRouter.
+"""
+
+import base64
+import logging
+import random
+from pathlib import Path
+
+import httpx
+
+from config.settings import settings
+
+logger = logging.getLogger(__name__)
+
+OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
+IMAGE_MODEL = "google/gemini-3-pro-image-preview"
+
+# Path to reference images folder
+ASSETS_PATH = Path(__file__).parent.parent / "assets"
+
+# System prompt for image generation
+IMAGE_SYSTEM_PROMPT = """You are an image generation assistant. Your task is to generate images based on reference images provided and user instructions. Always output an image."""
+
+
+def _get_reference_images() -> list[str]:
+    """
+    Get all reference images from assets folder as base64.
+
+    Returns:
+        List of base64-encoded images with data URI prefix.
+    """
+    if not ASSETS_PATH.exists():
+        logger.warning(f"Assets folder not found: {ASSETS_PATH}")
+        return []
+
+    images = []
+    supported_extensions = {".png", ".jpg", ".jpeg", ".gif", ".webp"}
+
+    for file_path in ASSETS_PATH.iterdir():
+        if file_path.suffix.lower() in supported_extensions:
+            try:
+                with open(file_path, "rb") as f:
+                    image_data = f.read()
+
+                # Determine MIME type
+                ext = file_path.suffix.lower()
+                mime_types = {
+                    ".png": "image/png",
+                    ".jpg": "image/jpeg",
+                    ".jpeg": "image/jpeg",
+                    ".gif": "image/gif",
+                    ".webp": "image/webp"
+                }
+                mime_type = mime_types.get(ext, "image/png")
+
+                # Create data URI
+                base64_data = base64.b64encode(image_data).decode()
+                data_uri = f"data:{mime_type};base64,{base64_data}"
+                images.append(data_uri)
+
+                logger.debug(f"Loaded reference image: {file_path.name}")
+            except Exception as e:
+                logger.error(f"Error loading image {file_path}: {e}")
+
+    logger.info(f"Loaded {len(images)} reference images from assets")
+    return images
+
+
+def _select_reference_images(count: int = 2) -> list[str]:
+    """
+    Select reference images for generation.
+
+    Args:
+        count: Number of images to select.
+
+    Returns:
+        List of base64-encoded images.
+    """
+    all_images = _get_reference_images()
+
+    if not all_images:
+        return []
+
+    if len(all_images) <= count:
+        return all_images
+
+    return random.sample(all_images, count)
+
+
+async def generate_image(prompt: str) -> bytes:
+    """
+    Generate an image from a text prompt using reference images.
+
+    This is the main tool function for image generation.
+    Uses reference images from assets/ folder for consistent character appearance.
+
+    Args:
+        prompt: Text description of the image to generate.
+
+    Returns:
+        Raw image bytes (PNG format).
+    """
+    logger.info(f"Generating image for prompt: {prompt[:100]}...")
+
+    headers = {
+        "Authorization": f"Bearer {settings.openrouter_api_key}",
+        "Content-Type": "application/json",
+        "HTTP-Referer": "https://pippinlovesdot.com",
+        "X-Title": "DOT Twitter Bot"
+    }
+
+    reference_images = _select_reference_images(2)
+
+    # Build content array with images and text
+    content = []
+
+    for image_uri in reference_images:
+        content.append({
+            "type": "image_url",
+            "image_url": {
+                "url": image_uri
+            }
+        })
+
+    content.append({
+        "type": "text",
+        "text": prompt
+    })
+
+    # Build request payload
+    payload = {
+        "model": IMAGE_MODEL,
+        "messages": [
+            {
+                "role": "system",
+                "content": IMAGE_SYSTEM_PROMPT
+            },
+            {
+                "role": "user",
+                "content": content
+            }
+        ]
+    }
+
+    async with httpx.AsyncClient(timeout=120.0) as client:
+        response = await client.post(
+            OPENROUTER_URL,
+            headers=headers,
+            json=payload
+        )
+        response.raise_for_status()
+        data = response.json()
+
+        # Extract image from response - images are in message.images array
+        message = data.get("choices", [{}])[0].get("message", {})
+        images = message.get("images", [])
+
+        if images:
+            # Get first image from images array
+            image_url = images[0].get("image_url", {}).get("url", "")
+            if image_url.startswith("data:"):
+                # Extract base64 from data URI (remove "data:image/...;base64," prefix)
+                base64_data = image_url.split(",", 1)[1]
+                image_bytes = base64.b64decode(base64_data)
+                logger.info(f"Generated image: {len(image_bytes)} bytes")
+                return image_bytes
+
+        raise ValueError(f"No image data in response: {list(message.keys())}")
diff --git a/tools/registry.py b/tools/registry.py
@@ -5,27 +5,53 @@
 Add your custom tools here to extend the bot's capabilities.
 """
 
+from tools.web_search import web_search
+from tools.image_generation import generate_image
+
 # Registry of available tools (function references)
-# Example: TOOLS = {"web_search": web_search}
-TOOLS = {}
+TOOLS = {
+    "web_search": web_search,
+    "generate_image": generate_image
+}
 
 # JSON Schema definitions for tools (OpenAI function calling format)
-# Example schema:
-# {
-#     "type": "function",
-#     "function": {
-#         "name": "tool_name",
-#         "description": "What this tool does",
-#         "parameters": {
-#             "type": "object",
-#             "properties": {
-#                 "param_name": {
-#                     "type": "string",
-#                     "description": "Parameter description"
-#                 }
-#             },
-#             "required": ["param_name"]
-#         }
-#     }
-# }
-TOOLS_SCHEMA = []
+TOOLS_SCHEMA = [
+    {
+        "type": "function",
+        "function": {
+            "name": "web_search",
+            "description": "Search the web for current information. Use this when you need to find recent news, events, prices, facts, or any information that might not be in your training data.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "The search query to look up"
+                    },
+                    "max_results": {
+                        "type": "integer",
+                        "description": "Maximum number of search results (1-10, default 5)"
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "generate_image",
+            "description": "Generate an image based on a text description. Uses reference images from assets folder for consistent character appearance.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "prompt": {
+                        "type": "string",
+                        "description": "Text description of the image to generate"
+                    }
+                },
+                "required": ["prompt"]
+            }
+        }
+    }
+]
diff --git a/tools/web_search.py b/tools/web_search.py