Skip to content

Commit 63bf821

Browse files
Add tools system: web search and image generation (v1.1.3)
- Add web_search tool using OpenRouter native plugins API - Add image_generation tool (standalone function for agent architecture) - Register both tools in registry with OpenAI-style schemas - Update ARCHITECTURE.md with tools documentation - Update README.md with new tools in project structure
1 parent 266547b commit 63bf821

File tree

5 files changed

+340
-25
lines changed

5 files changed

+340
-25
lines changed

ARCHITECTURE.md

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ services/
4444
tools/
4545
__init__.py
4646
registry.py # Tool registry for function calling
47+
web_search.py # Web search via OpenRouter plugins
48+
image_generation.py # Image generation tool
4749
assets/ # Reference images for generation
4850
.env.example # Environment template
4951
requirements.txt # Python dependencies
@@ -185,11 +187,43 @@ Tool registry for LLM function calling. Contains:
185187
- `TOOLS` — dict mapping tool names to async functions
186188
- `TOOLS_SCHEMA` — list of JSON schemas in OpenAI function calling format
187189

188-
Currently empty, ready for extension. To add a tool:
190+
**Available tools:**
191+
- `web_search` — real-time web search via OpenRouter plugins
192+
- `generate_image` — image generation using Gemini 3 Pro
193+
194+
To add a new tool:
189195
1. Create tool function in `tools/` directory
190196
2. Import and add to `TOOLS` dict
191197
3. Add JSON schema to `TOOLS_SCHEMA` list
192198

199+
### tools/web_search.py
200+
Web search using OpenRouter's native web search plugin.
201+
202+
**How it works:**
203+
- Uses `plugins: [{id: "web"}]` parameter in OpenRouter API
204+
- Returns search results with source citations (URLs, titles, snippets)
205+
- Configurable `max_results` (1-10, default 5)
206+
207+
**Function signature:**
208+
```python
209+
async def web_search(query: str, max_results: int = 5) -> dict[str, Any]:
210+
# Returns {"content": "...", "sources": [...]}
211+
```
212+
213+
### tools/image_generation.py
214+
Image generation using Gemini 3 Pro via OpenRouter.
215+
216+
**How it works:**
217+
- Loads reference images from `assets/` folder (up to 2 randomly selected)
218+
- Sends reference images + prompt to model for consistent character appearance
219+
- Returns raw image bytes (PNG format)
220+
221+
**Function signature:**
222+
```python
223+
async def generate_image(prompt: str) -> bytes:
224+
# Returns image bytes
225+
```
226+
193227
---
194228

195229
## Database Schema

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,9 @@ Visual content generation supports two providers:
176176

177177
### Web Search
178178

179-
Real-time web search capability powered by **Perplexity** via OpenRouter:
179+
Real-time web search capability powered by **OpenRouter's native plugins**:
180180

181-
- **Perplexity Sonar**Online search model for current information, news, and facts. Automatically invoked by the LLM when it needs fresh data. Integrated through OpenRouter for unified API access.
181+
- **OpenRouter Web Plugin**Native web search using `plugins: [{id: "web"}]` API. Returns real search results with source citations (URLs, titles, snippets). Supports multiple search engines including native provider search and Exa.ai.
182182

183183
### Twitter Integration
184184

@@ -224,7 +224,8 @@ my-agent/
224224
225225
├── tools/
226226
│ ├── registry.py # Available tools for LLM
227-
│ └── web_search.py # Web search capability
227+
│ ├── web_search.py # Web search via OpenRouter plugins
228+
│ └── image_generation.py # Image generation tool
228229
229230
├── main.py # FastAPI + APScheduler entry point
230231
├── requirements.txt # Dependencies

tools/image_generation.py

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
"""
2+
Image generation tool using OpenRouter API.
3+
4+
Generates images based on text prompts and reference images from assets folder.
5+
Uses google/gemini-3-pro-image-preview model via OpenRouter.
6+
"""
7+
8+
import base64
9+
import logging
10+
import random
11+
from pathlib import Path
12+
13+
import httpx
14+
15+
from config.settings import settings
16+
17+
logger = logging.getLogger(__name__)
18+
19+
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
20+
IMAGE_MODEL = "google/gemini-3-pro-image-preview"
21+
22+
# Path to reference images folder
23+
ASSETS_PATH = Path(__file__).parent.parent / "assets"
24+
25+
# System prompt for image generation
26+
IMAGE_SYSTEM_PROMPT = """You are an image generation assistant. Your task is to generate images based on reference images provided and user instructions. Always output an image."""
27+
28+
29+
def _get_reference_images() -> list[str]:
30+
"""
31+
Get all reference images from assets folder as base64.
32+
33+
Returns:
34+
List of base64-encoded images with data URI prefix.
35+
"""
36+
if not ASSETS_PATH.exists():
37+
logger.warning(f"Assets folder not found: {ASSETS_PATH}")
38+
return []
39+
40+
images = []
41+
supported_extensions = {".png", ".jpg", ".jpeg", ".gif", ".webp"}
42+
43+
for file_path in ASSETS_PATH.iterdir():
44+
if file_path.suffix.lower() in supported_extensions:
45+
try:
46+
with open(file_path, "rb") as f:
47+
image_data = f.read()
48+
49+
# Determine MIME type
50+
ext = file_path.suffix.lower()
51+
mime_types = {
52+
".png": "image/png",
53+
".jpg": "image/jpeg",
54+
".jpeg": "image/jpeg",
55+
".gif": "image/gif",
56+
".webp": "image/webp"
57+
}
58+
mime_type = mime_types.get(ext, "image/png")
59+
60+
# Create data URI
61+
base64_data = base64.b64encode(image_data).decode()
62+
data_uri = f"data:{mime_type};base64,{base64_data}"
63+
images.append(data_uri)
64+
65+
logger.debug(f"Loaded reference image: {file_path.name}")
66+
except Exception as e:
67+
logger.error(f"Error loading image {file_path}: {e}")
68+
69+
logger.info(f"Loaded {len(images)} reference images from assets")
70+
return images
71+
72+
73+
def _select_reference_images(count: int = 2) -> list[str]:
74+
"""
75+
Select reference images for generation.
76+
77+
Args:
78+
count: Number of images to select.
79+
80+
Returns:
81+
List of base64-encoded images.
82+
"""
83+
all_images = _get_reference_images()
84+
85+
if not all_images:
86+
return []
87+
88+
if len(all_images) <= count:
89+
return all_images
90+
91+
return random.sample(all_images, count)
92+
93+
94+
async def generate_image(prompt: str) -> bytes:
95+
"""
96+
Generate an image from a text prompt using reference images.
97+
98+
This is the main tool function for image generation.
99+
Uses reference images from assets/ folder for consistent character appearance.
100+
101+
Args:
102+
prompt: Text description of the image to generate.
103+
104+
Returns:
105+
Raw image bytes (PNG format).
106+
"""
107+
logger.info(f"Generating image for prompt: {prompt[:100]}...")
108+
109+
headers = {
110+
"Authorization": f"Bearer {settings.openrouter_api_key}",
111+
"Content-Type": "application/json",
112+
"HTTP-Referer": "https://pippinlovesdot.com",
113+
"X-Title": "DOT Twitter Bot"
114+
}
115+
116+
reference_images = _select_reference_images(2)
117+
118+
# Build content array with images and text
119+
content = []
120+
121+
for image_uri in reference_images:
122+
content.append({
123+
"type": "image_url",
124+
"image_url": {
125+
"url": image_uri
126+
}
127+
})
128+
129+
content.append({
130+
"type": "text",
131+
"text": prompt
132+
})
133+
134+
# Build request payload
135+
payload = {
136+
"model": IMAGE_MODEL,
137+
"messages": [
138+
{
139+
"role": "system",
140+
"content": IMAGE_SYSTEM_PROMPT
141+
},
142+
{
143+
"role": "user",
144+
"content": content
145+
}
146+
]
147+
}
148+
149+
async with httpx.AsyncClient(timeout=120.0) as client:
150+
response = await client.post(
151+
OPENROUTER_URL,
152+
headers=headers,
153+
json=payload
154+
)
155+
response.raise_for_status()
156+
data = response.json()
157+
158+
# Extract image from response - images are in message.images array
159+
message = data.get("choices", [{}])[0].get("message", {})
160+
images = message.get("images", [])
161+
162+
if images:
163+
# Get first image from images array
164+
image_url = images[0].get("image_url", {}).get("url", "")
165+
if image_url.startswith("data:"):
166+
# Extract base64 from data URI (remove "data:image/...;base64," prefix)
167+
base64_data = image_url.split(",", 1)[1]
168+
image_bytes = base64.b64decode(base64_data)
169+
logger.info(f"Generated image: {len(image_bytes)} bytes")
170+
return image_bytes
171+
172+
raise ValueError(f"No image data in response: {list(message.keys())}")

tools/registry.py

Lines changed: 47 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,53 @@
55
Add your custom tools here to extend the bot's capabilities.
66
"""
77

8+
from tools.web_search import web_search
9+
from tools.image_generation import generate_image
10+
811
# Registry of available tools (function references)
9-
# Example: TOOLS = {"web_search": web_search}
10-
TOOLS = {}
12+
TOOLS = {
13+
"web_search": web_search,
14+
"generate_image": generate_image
15+
}
1116

1217
# JSON Schema definitions for tools (OpenAI function calling format)
13-
# Example schema:
14-
# {
15-
# "type": "function",
16-
# "function": {
17-
# "name": "tool_name",
18-
# "description": "What this tool does",
19-
# "parameters": {
20-
# "type": "object",
21-
# "properties": {
22-
# "param_name": {
23-
# "type": "string",
24-
# "description": "Parameter description"
25-
# }
26-
# },
27-
# "required": ["param_name"]
28-
# }
29-
# }
30-
# }
31-
TOOLS_SCHEMA = []
18+
TOOLS_SCHEMA = [
19+
{
20+
"type": "function",
21+
"function": {
22+
"name": "web_search",
23+
"description": "Search the web for current information. Use this when you need to find recent news, events, prices, facts, or any information that might not be in your training data.",
24+
"parameters": {
25+
"type": "object",
26+
"properties": {
27+
"query": {
28+
"type": "string",
29+
"description": "The search query to look up"
30+
},
31+
"max_results": {
32+
"type": "integer",
33+
"description": "Maximum number of search results (1-10, default 5)"
34+
}
35+
},
36+
"required": ["query"]
37+
}
38+
}
39+
},
40+
{
41+
"type": "function",
42+
"function": {
43+
"name": "generate_image",
44+
"description": "Generate an image based on a text description. Uses reference images from assets folder for consistent character appearance.",
45+
"parameters": {
46+
"type": "object",
47+
"properties": {
48+
"prompt": {
49+
"type": "string",
50+
"description": "Text description of the image to generate"
51+
}
52+
},
53+
"required": ["prompt"]
54+
}
55+
}
56+
}
57+
]

0 commit comments

Comments
 (0)